Remove ppc64le assembly. We no longer have a need to support ppc64le, nor do we have any testing story for the assembly we previously had. Remove all ppc64le-specific assembly. This CL stops short of removing it from base.h. That'll be done in a follow-up CL, just to separate which removals are for the assembly and which removals remove all support. Update-Note: After this change, ppc64le builds drop assembly optimizations and will fallback to a generic C-based AES implementation. Change-Id: Ic8075638085761d66cebc276eb16c4770ce03920 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/56388 Commit-Queue: David Benjamin <davidben@google.com> Reviewed-by: Adam Langley <agl@google.com>

commit: 5fdc03f74adfd3d270aa6f64b3f3050c19e52e49 [log] [tgz]
author: David Benjamin <davidben@google.com> Thu Jan 26 18:55:32 2023 -0500
committer: Boringssl LUCI CQ <boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com> Fri Jan 27 14:19:27 2023 +0000
tree: f255cbf4c928f8ea56fae3f1cf74310fe2e1ddd0
parent: cbccae538c6f03cdf7b9fa263fd1c37724d7a769 [diff]
diff --git a/cmake/perlasm.cmake b/cmake/perlasm.cmake
index b9530cf..9828799 100644
--- a/cmake/perlasm.cmake
+++ b/cmake/perlasm.cmake

@@ -17,7 +17,6 @@
     DEPENDS
     ${src}
     ${PROJECT_SOURCE_DIR}/crypto/perlasm/arm-xlate.pl
-    ${PROJECT_SOURCE_DIR}/crypto/perlasm/ppc-xlate.pl
     ${PROJECT_SOURCE_DIR}/crypto/perlasm/x86_64-xlate.pl
     ${PROJECT_SOURCE_DIR}/crypto/perlasm/x86asm.pl
     ${PROJECT_SOURCE_DIR}/crypto/perlasm/x86gas.pl
@@ -41,9 +40,6 @@
     add_perlasm_target("${dest}-apple.S" ${src} ios32)
     add_perlasm_target("${dest}-linux.S" ${src} linux32)
     append_to_parent_scope("${var}_ASM" "${dest}-apple.S" "${dest}-linux.S")
-  elseif(arch STREQUAL "ppc64le")
-    add_perlasm_target("${dest}-linux.S" ${src} linux64le)
-    append_to_parent_scope("${var}_ASM" "${dest}-linux.S")
   elseif(arch STREQUAL "x86")
     add_perlasm_target("${dest}-apple.S" ${src} macosx -fPIC -DOPENSSL_IA32_SSE2)
     add_perlasm_target("${dest}-linux.S" ${src} elf -fPIC -DOPENSSL_IA32_SSE2)

diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt
index e1634a4..ec6d2ee 100644
--- a/crypto/CMakeLists.txt
+++ b/crypto/CMakeLists.txt

@@ -24,7 +24,6 @@
 perlasm(CRYPTO_SOURCES aarch64 test/trampoline-armv8 test/asm/trampoline-armv8.pl)
 perlasm(CRYPTO_SOURCES arm chacha/chacha-armv4 chacha/asm/chacha-armv4.pl)
 perlasm(CRYPTO_SOURCES arm test/trampoline-armv4 test/asm/trampoline-armv4.pl)
-perlasm(CRYPTO_SOURCES ppc64le test/trampoline-ppc test/asm/trampoline-ppc.pl)
 perlasm(CRYPTO_SOURCES x86 chacha/chacha-x86 chacha/asm/chacha-x86.pl)
 perlasm(CRYPTO_SOURCES x86 test/trampoline-x86 test/asm/trampoline-x86.pl)
 perlasm(CRYPTO_SOURCES x86_64 chacha/chacha-x86_64 chacha/asm/chacha-x86_64.pl)
@@ -135,7 +134,6 @@
   cpu_arm_linux.c
   cpu_arm.c
   cpu_intel.c
-  cpu_ppc64le.c
   crypto.c
   curve25519/curve25519.c
   curve25519/spake25519.c

diff --git a/crypto/abi_self_test.cc b/crypto/abi_self_test.cc
index 9681498..a42bd1d 100644
--- a/crypto/abi_self_test.cc
+++ b/crypto/abi_self_test.cc

@@ -521,289 +521,3 @@
   CHECK_ABI_NO_UNWIND(abi_test_clobber_v15_upper);
 }
 #endif   // OPENSSL_AARCH64 && SUPPORTS_ABI_TEST
-
-#if defined(OPENSSL_PPC64LE) && defined(SUPPORTS_ABI_TEST)
-extern "C" {
-void abi_test_clobber_r0(void);
-// r1 is the stack pointer.
-void abi_test_clobber_r2(void);
-void abi_test_clobber_r3(void);
-void abi_test_clobber_r4(void);
-void abi_test_clobber_r5(void);
-void abi_test_clobber_r6(void);
-void abi_test_clobber_r7(void);
-void abi_test_clobber_r8(void);
-void abi_test_clobber_r9(void);
-void abi_test_clobber_r10(void);
-void abi_test_clobber_r11(void);
-void abi_test_clobber_r12(void);
-// r13 is the thread pointer.
-void abi_test_clobber_r14(void);
-void abi_test_clobber_r15(void);
-void abi_test_clobber_r16(void);
-void abi_test_clobber_r17(void);
-void abi_test_clobber_r18(void);
-void abi_test_clobber_r19(void);
-void abi_test_clobber_r20(void);
-void abi_test_clobber_r21(void);
-void abi_test_clobber_r22(void);
-void abi_test_clobber_r23(void);
-void abi_test_clobber_r24(void);
-void abi_test_clobber_r25(void);
-void abi_test_clobber_r26(void);
-void abi_test_clobber_r27(void);
-void abi_test_clobber_r28(void);
-void abi_test_clobber_r29(void);
-void abi_test_clobber_r30(void);
-void abi_test_clobber_r31(void);
-
-void abi_test_clobber_f0(void);
-void abi_test_clobber_f1(void);
-void abi_test_clobber_f2(void);
-void abi_test_clobber_f3(void);
-void abi_test_clobber_f4(void);
-void abi_test_clobber_f5(void);
-void abi_test_clobber_f6(void);
-void abi_test_clobber_f7(void);
-void abi_test_clobber_f8(void);
-void abi_test_clobber_f9(void);
-void abi_test_clobber_f10(void);
-void abi_test_clobber_f11(void);
-void abi_test_clobber_f12(void);
-void abi_test_clobber_f13(void);
-void abi_test_clobber_f14(void);
-void abi_test_clobber_f15(void);
-void abi_test_clobber_f16(void);
-void abi_test_clobber_f17(void);
-void abi_test_clobber_f18(void);
-void abi_test_clobber_f19(void);
-void abi_test_clobber_f20(void);
-void abi_test_clobber_f21(void);
-void abi_test_clobber_f22(void);
-void abi_test_clobber_f23(void);
-void abi_test_clobber_f24(void);
-void abi_test_clobber_f25(void);
-void abi_test_clobber_f26(void);
-void abi_test_clobber_f27(void);
-void abi_test_clobber_f28(void);
-void abi_test_clobber_f29(void);
-void abi_test_clobber_f30(void);
-void abi_test_clobber_f31(void);
-
-void abi_test_clobber_v0(void);
-void abi_test_clobber_v1(void);
-void abi_test_clobber_v2(void);
-void abi_test_clobber_v3(void);
-void abi_test_clobber_v4(void);
-void abi_test_clobber_v5(void);
-void abi_test_clobber_v6(void);
-void abi_test_clobber_v7(void);
-void abi_test_clobber_v8(void);
-void abi_test_clobber_v9(void);
-void abi_test_clobber_v10(void);
-void abi_test_clobber_v11(void);
-void abi_test_clobber_v12(void);
-void abi_test_clobber_v13(void);
-void abi_test_clobber_v14(void);
-void abi_test_clobber_v15(void);
-void abi_test_clobber_v16(void);
-void abi_test_clobber_v17(void);
-void abi_test_clobber_v18(void);
-void abi_test_clobber_v19(void);
-void abi_test_clobber_v20(void);
-void abi_test_clobber_v21(void);
-void abi_test_clobber_v22(void);
-void abi_test_clobber_v23(void);
-void abi_test_clobber_v24(void);
-void abi_test_clobber_v25(void);
-void abi_test_clobber_v26(void);
-void abi_test_clobber_v27(void);
-void abi_test_clobber_v28(void);
-void abi_test_clobber_v29(void);
-void abi_test_clobber_v30(void);
-void abi_test_clobber_v31(void);
-
-void abi_test_clobber_cr0(void);
-void abi_test_clobber_cr1(void);
-void abi_test_clobber_cr2(void);
-void abi_test_clobber_cr3(void);
-void abi_test_clobber_cr4(void);
-void abi_test_clobber_cr5(void);
-void abi_test_clobber_cr6(void);
-void abi_test_clobber_cr7(void);
-
-void abi_test_clobber_ctr(void);
-void abi_test_clobber_lr(void);
-
-}  // extern "C"
-
-TEST(ABITest, PPC64LE) {
-  // abi_test_trampoline hides unsaved registers from the caller, so we can
-  // safely call the abi_test_clobber_* functions below.
-  abi_test::internal::CallerState state;
-  RAND_bytes(reinterpret_cast<uint8_t *>(&state), sizeof(state));
-  CHECK_ABI_NO_UNWIND(abi_test_trampoline,
-                      reinterpret_cast<crypto_word_t>(abi_test_clobber_r14),
-                      &state, nullptr, 0, 0 /* no breakpoint */);
-
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_r0);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_r2);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_r3);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_r4);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_r5);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_r6);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_r7);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_r8);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_r9);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_r10);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_r11);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_r12);
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r14),
-                          "r14 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r15),
-                          "r15 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r16),
-                          "r16 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r17),
-                          "r17 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r18),
-                          "r18 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r19),
-                          "r19 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r20),
-                          "r20 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r21),
-                          "r21 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r22),
-                          "r22 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r23),
-                          "r23 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r24),
-                          "r24 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r25),
-                          "r25 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r26),
-                          "r26 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r27),
-                          "r27 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r28),
-                          "r28 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r29),
-                          "r29 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r30),
-                          "r30 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r31),
-                          "r31 was not restored after return");
-
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_f0);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_f1);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_f2);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_f3);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_f4);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_f5);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_f6);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_f7);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_f8);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_f9);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_f10);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_f11);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_f12);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_f13);
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f14),
-                          "f14 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f15),
-                          "f15 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f16),
-                          "f16 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f17),
-                          "f17 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f18),
-                          "f18 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f19),
-                          "f19 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f20),
-                          "f20 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f21),
-                          "f21 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f22),
-                          "f22 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f23),
-                          "f23 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f24),
-                          "f24 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f25),
-                          "f25 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f26),
-                          "f26 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f27),
-                          "f27 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f28),
-                          "f28 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f29),
-                          "f29 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f30),
-                          "f30 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f31),
-                          "f31 was not restored after return");
-
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v0);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v1);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v2);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v3);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v4);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v5);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v6);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v7);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v8);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v9);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v10);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v11);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v12);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v13);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v14);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v15);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v16);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v17);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v18);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_v19);
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v20),
-                          "v20 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v21),
-                          "v21 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v22),
-                          "v22 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v23),
-                          "v23 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v24),
-                          "v24 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v25),
-                          "v25 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v26),
-                          "v26 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v27),
-                          "v27 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v28),
-                          "v28 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v29),
-                          "v29 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v30),
-                          "v30 was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v31),
-                          "v31 was not restored after return");
-
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_cr0);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_cr1);
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr2),
-                          "cr was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr3),
-                          "cr was not restored after return");
-  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr4),
-                          "cr was not restored after return");
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_cr5);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_cr6);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_cr7);
-
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_ctr);
-  CHECK_ABI_NO_UNWIND(abi_test_clobber_lr);
-}
-#endif   // OPENSSL_PPC64LE && SUPPORTS_ABI_TEST

diff --git a/crypto/cpu_ppc64le.c b/crypto/cpu_ppc64le.c
deleted file mode 100644
index a802e37..0000000
--- a/crypto/cpu_ppc64le.c
+++ /dev/null

@@ -1,38 +0,0 @@
-/* Copyright (c) 2016, Google Inc.
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-
-#include <openssl/base.h>
-
-#if defined(OPENSSL_PPC64LE)
-
-#include <sys/auxv.h>
-
-#include "internal.h"
-
-
-#if !defined(PPC_FEATURE2_HAS_VCRYPTO)
-// PPC_FEATURE2_HAS_VCRYPTO was taken from section 4.1.2.3 of the “OpenPOWER
-// ABI for Linux Supplement”.
-#define PPC_FEATURE2_HAS_VCRYPTO 0x02000000
-#endif
-
-void OPENSSL_cpuid_setup(void) {
-  OPENSSL_ppc64le_hwcap2 = getauxval(AT_HWCAP2);
-}
-
-int CRYPTO_is_PPC64LE_vcrypto_capable(void) {
-  return (OPENSSL_ppc64le_hwcap2 & PPC_FEATURE2_HAS_VCRYPTO) != 0;
-}
-
-#endif  // OPENSSL_PPC64LE

diff --git a/crypto/crypto.c b/crypto/crypto.c
index 12cbb888..beaae0f 100644
--- a/crypto/crypto.c
+++ b/crypto/crypto.c

@@ -25,12 +25,10 @@
               "ossl_ssize_t should be the same size as size_t");
 
 #if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_STATIC_ARMCAP) && \
-    (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
-     defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
-     defined(OPENSSL_PPC64LE))
-// x86, x86_64, the ARMs and ppc64le need to record the result of a
-// cpuid/getauxval call for the asm to work correctly, unless compiled without
-// asm code.
+    (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) ||            \
+     defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
+// x86, x86_64, and the ARMs need to record the result of a cpuid/getauxval call
+// for the asm to work correctly, unless compiled without asm code.
 #define NEED_CPUID
 
 #else
@@ -41,8 +39,7 @@
 #define BORINGSSL_NO_STATIC_INITIALIZER
 #endif
 
-#endif  // !NO_ASM && !STATIC_ARMCAP &&
-        // (X86 || X86_64 || ARM || AARCH64 || PPC64LE)
+#endif  // !NO_ASM && !STATIC_ARMCAP && (X86 || X86_64 || ARM || AARCH64)
 
 
 // Our assembly does not use the GOT to reference symbols, which means
@@ -81,10 +78,6 @@
 // This value must be explicitly initialized to zero. See similar comment above.
 HIDDEN uint32_t OPENSSL_ia32cap_P[4] = {0};
 
-#elif defined(OPENSSL_PPC64LE)
-
-HIDDEN unsigned long OPENSSL_ppc64le_hwcap2 = 0;
-
 #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
 
 #include <openssl/arm_arch.h>

diff --git a/crypto/fipsmodule/CMakeLists.txt b/crypto/fipsmodule/CMakeLists.txt
index 82505b7..2bfadab 100644
--- a/crypto/fipsmodule/CMakeLists.txt
+++ b/crypto/fipsmodule/CMakeLists.txt

@@ -20,8 +20,6 @@
 perlasm(BCM_SOURCES arm sha256-armv4 sha/asm/sha256-armv4.pl)
 perlasm(BCM_SOURCES arm sha512-armv4 sha/asm/sha512-armv4.pl)
 perlasm(BCM_SOURCES arm vpaes-armv7 aes/asm/vpaes-armv7.pl)
-perlasm(BCM_SOURCES ppc64le aesp8-ppc aes/asm/aesp8-ppc.pl)
-perlasm(BCM_SOURCES ppc64le ghashp8-ppc modes/asm/ghashp8-ppc.pl)
 perlasm(BCM_SOURCES x86 aesni-x86 aes/asm/aesni-x86.pl)
 perlasm(BCM_SOURCES x86 bn-586 bn/asm/bn-586.pl)
 perlasm(BCM_SOURCES x86 co-586 bn/asm/co-586.pl)

diff --git a/crypto/fipsmodule/aes/asm/aesp8-ppc.pl b/crypto/fipsmodule/aes/asm/aesp8-ppc.pl
deleted file mode 100644
index 061f6b7..0000000
--- a/crypto/fipsmodule/aes/asm/aesp8-ppc.pl
+++ /dev/null

@@ -1,3809 +0,0 @@
-#! /usr/bin/env perl
-# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
-#
-# Licensed under the OpenSSL license (the "License").  You may not use
-# this file except in compliance with the License.  You can obtain a copy
-# in the file LICENSE in the source distribution or at
-# https://www.openssl.org/source/license.html
-
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# This module implements support for AES instructions as per PowerISA
-# specification version 2.07, first implemented by POWER8 processor.
-# The module is endian-agnostic in sense that it supports both big-
-# and little-endian cases. Data alignment in parallelizable modes is
-# handled with VSX loads and stores, which implies MSR.VSX flag being
-# set. It should also be noted that ISA specification doesn't prohibit
-# alignment exceptions for these instructions on page boundaries.
-# Initially alignment was handled in pure AltiVec/VMX way [when data
-# is aligned programmatically, which in turn guarantees exception-
-# free execution], but it turned to hamper performance when vcipher
-# instructions are interleaved. It's reckoned that eventual
-# misalignment penalties at page boundaries are in average lower
-# than additional overhead in pure AltiVec approach.
-#
-# May 2016
-#
-# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
-# systems were measured.
-#
-######################################################################
-# Current large-block performance in cycles per byte processed with
-# 128-bit key (less is better).
-#
-#		CBC en-/decrypt	CTR	XTS
-# POWER8[le]	3.96/0.72	0.74	1.1
-# POWER8[be]	3.75/0.65	0.66	1.0
-# POWER9[le]	4.02/0.86	0.84	1.05
-# POWER9[be]	3.99/0.78	0.79	0.97
-
-$flavour = shift;
-$output = shift;
-
-if ($flavour =~ /64/) {
-	$SIZE_T	=8;
-	$LRSAVE	=2*$SIZE_T;
-	$STU	="stdu";
-	$POP	="ld";
-	$PUSH	="std";
-	$UCMP	="cmpld";
-	$SHL	="sldi";
-} elsif ($flavour =~ /32/) {
-	$SIZE_T	=4;
-	$LRSAVE	=$SIZE_T;
-	$STU	="stwu";
-	$POP	="lwz";
-	$PUSH	="stw";
-	$UCMP	="cmplw";
-	$SHL	="slwi";
-} else { die "nonsense $flavour"; }
-
-$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-
-open OUT,"| $^X \"$xlate\" $flavour \"$output\"" || die "can't call $xlate: $!";
-*STDOUT=*OUT;
-
-$FRAME=8*$SIZE_T;
-$prefix="aes_hw";
-
-$sp="r1";
-$vrsave="r12";
-
-#########################################################################
-{{{	# Key setup procedures						#
-my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
-my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
-my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
-
-$code.=<<___;
-.machine	"any"
-
-.text
-
-.align	7
-Lrcon:
-.long	0x01000000, 0x01000000, 0x01000000, 0x01000000	?rev
-.long	0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000	?rev
-.long	0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c	?rev
-.long	0,0,0,0						?asis
-Lconsts:
-	mflr	r0
-	bcl	20,31,\$+4
-	mflr	$ptr	 #vvvvv "distance between . and rcon
-	addi	$ptr,$ptr,-0x48
-	mtlr	r0
-	blr
-	.long	0
-	.byte	0,12,0x14,0,0,0,0,0
-.asciz	"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
-
-.globl	.${prefix}_set_encrypt_key
-.align	5
-.${prefix}_set_encrypt_key:
-Lset_encrypt_key:
-	mflr		r11
-	$PUSH		r11,$LRSAVE($sp)
-
-	li		$ptr,-1
-	${UCMP}i	$inp,0
-	beq-		Lenc_key_abort		# if ($inp==0) return -1;
-	${UCMP}i	$out,0
-	beq-		Lenc_key_abort		# if ($out==0) return -1;
-	li		$ptr,-2
-	cmpwi		$bits,128
-	blt-		Lenc_key_abort
-	cmpwi		$bits,256
-	bgt-		Lenc_key_abort
-	andi.		r0,$bits,0x3f
-	bne-		Lenc_key_abort
-
-	lis		r0,0xfff0
-	mfspr		$vrsave,256
-	mtspr		256,r0
-
-	bl		Lconsts
-	mtlr		r11
-
-	neg		r9,$inp
-	lvx		$in0,0,$inp
-	addi		$inp,$inp,15		# 15 is not typo
-	lvsr		$key,0,r9		# borrow $key
-	li		r8,0x20
-	cmpwi		$bits,192
-	lvx		$in1,0,$inp
-	le?vspltisb	$mask,0x0f		# borrow $mask
-	lvx		$rcon,0,$ptr
-	le?vxor		$key,$key,$mask		# adjust for byte swap
-	lvx		$mask,r8,$ptr
-	addi		$ptr,$ptr,0x10
-	vperm		$in0,$in0,$in1,$key	# align [and byte swap in LE]
-	li		$cnt,8
-	vxor		$zero,$zero,$zero
-	mtctr		$cnt
-
-	?lvsr		$outperm,0,$out
-	vspltisb	$outmask,-1
-	lvx		$outhead,0,$out
-	?vperm		$outmask,$zero,$outmask,$outperm
-
-	blt		Loop128
-	addi		$inp,$inp,8
-	beq		L192
-	addi		$inp,$inp,8
-	b		L256
-
-.align	4
-Loop128:
-	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
-	vsldoi		$tmp,$zero,$in0,12	# >>32
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	vcipherlast	$key,$key,$rcon
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	 vadduwm	$rcon,$rcon,$rcon
-	vxor		$in0,$in0,$key
-	bdnz		Loop128
-
-	lvx		$rcon,0,$ptr		# last two round keys
-
-	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
-	vsldoi		$tmp,$zero,$in0,12	# >>32
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	vcipherlast	$key,$key,$rcon
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	 vadduwm	$rcon,$rcon,$rcon
-	vxor		$in0,$in0,$key
-
-	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
-	vsldoi		$tmp,$zero,$in0,12	# >>32
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	vcipherlast	$key,$key,$rcon
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	vxor		$in0,$in0,$key
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	 stvx		$stage,0,$out
-
-	addi		$inp,$out,15		# 15 is not typo
-	addi		$out,$out,0x50
-
-	li		$rounds,10
-	b		Ldone
-
-.align	4
-L192:
-	lvx		$tmp,0,$inp
-	li		$cnt,4
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
-	vspltisb	$key,8			# borrow $key
-	mtctr		$cnt
-	vsububm		$mask,$mask,$key	# adjust the mask
-
-Loop192:
-	vperm		$key,$in1,$in1,$mask	# roate-n-splat
-	vsldoi		$tmp,$zero,$in0,12	# >>32
-	vcipherlast	$key,$key,$rcon
-
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-
-	 vsldoi		$stage,$zero,$in1,8
-	vspltw		$tmp,$in0,3
-	vxor		$tmp,$tmp,$in1
-	vsldoi		$in1,$zero,$in1,12	# >>32
-	 vadduwm	$rcon,$rcon,$rcon
-	vxor		$in1,$in1,$tmp
-	vxor		$in0,$in0,$key
-	vxor		$in1,$in1,$key
-	 vsldoi		$stage,$stage,$in0,8
-
-	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
-	vsldoi		$tmp,$zero,$in0,12	# >>32
-	 vperm		$outtail,$stage,$stage,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	vcipherlast	$key,$key,$rcon
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-
-	 vsldoi		$stage,$in0,$in1,8
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	 vperm		$outtail,$stage,$stage,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-
-	vspltw		$tmp,$in0,3
-	vxor		$tmp,$tmp,$in1
-	vsldoi		$in1,$zero,$in1,12	# >>32
-	 vadduwm	$rcon,$rcon,$rcon
-	vxor		$in1,$in1,$tmp
-	vxor		$in0,$in0,$key
-	vxor		$in1,$in1,$key
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	 stvx		$stage,0,$out
-	 addi		$inp,$out,15		# 15 is not typo
-	 addi		$out,$out,16
-	bdnz		Loop192
-
-	li		$rounds,12
-	addi		$out,$out,0x20
-	b		Ldone
-
-.align	4
-L256:
-	lvx		$tmp,0,$inp
-	li		$cnt,7
-	li		$rounds,14
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
-	mtctr		$cnt
-
-Loop256:
-	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
-	vsldoi		$tmp,$zero,$in0,12	# >>32
-	 vperm		$outtail,$in1,$in1,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	vcipherlast	$key,$key,$rcon
-	 stvx		$stage,0,$out
-	 addi		$out,$out,16
-
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in0,$in0,$tmp
-	 vadduwm	$rcon,$rcon,$rcon
-	vxor		$in0,$in0,$key
-	 vperm		$outtail,$in0,$in0,$outperm	# rotate
-	 vsel		$stage,$outhead,$outtail,$outmask
-	 vmr		$outhead,$outtail
-	 stvx		$stage,0,$out
-	 addi		$inp,$out,15		# 15 is not typo
-	 addi		$out,$out,16
-	bdz		Ldone
-
-	vspltw		$key,$in0,3		# just splat
-	vsldoi		$tmp,$zero,$in1,12	# >>32
-	vsbox		$key,$key
-
-	vxor		$in1,$in1,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in1,$in1,$tmp
-	vsldoi		$tmp,$zero,$tmp,12	# >>32
-	vxor		$in1,$in1,$tmp
-
-	vxor		$in1,$in1,$key
-	b		Loop256
-
-.align	4
-Ldone:
-	lvx		$in1,0,$inp		# redundant in aligned case
-	vsel		$in1,$outhead,$in1,$outmask
-	stvx		$in1,0,$inp
-	li		$ptr,0
-	mtspr		256,$vrsave
-	stw		$rounds,0($out)
-
-Lenc_key_abort:
-	mr		r3,$ptr
-	blr
-	.long		0
-	.byte		0,12,0x14,1,0,0,3,0
-	.long		0
-.size	.${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
-
-.globl	.${prefix}_set_decrypt_key
-.align	5
-.${prefix}_set_decrypt_key:
-	$STU		$sp,-$FRAME($sp)
-	mflr		r10
-	$PUSH		r10,`$FRAME+$LRSAVE`($sp)
-	bl		Lset_encrypt_key
-	mtlr		r10
-
-	cmpwi		r3,0
-	bne-		Ldec_key_abort
-
-	slwi		$cnt,$rounds,4
-	subi		$inp,$out,240		# first round key
-	srwi		$rounds,$rounds,1
-	add		$out,$inp,$cnt		# last round key
-	mtctr		$rounds
-
-Ldeckey:
-	lwz		r0, 0($inp)
-	lwz		r6, 4($inp)
-	lwz		r7, 8($inp)
-	lwz		r8, 12($inp)
-	addi		$inp,$inp,16
-	lwz		r9, 0($out)
-	lwz		r10,4($out)
-	lwz		r11,8($out)
-	lwz		r12,12($out)
-	stw		r0, 0($out)
-	stw		r6, 4($out)
-	stw		r7, 8($out)
-	stw		r8, 12($out)
-	subi		$out,$out,16
-	stw		r9, -16($inp)
-	stw		r10,-12($inp)
-	stw		r11,-8($inp)
-	stw		r12,-4($inp)
-	bdnz		Ldeckey
-
-	xor		r3,r3,r3		# return value
-Ldec_key_abort:
-	addi		$sp,$sp,$FRAME
-	blr
-	.long		0
-	.byte		0,12,4,1,0x80,0,3,0
-	.long		0
-.size	.${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
-___
-}}}
-#########################################################################
-{{{	# Single block en- and decrypt procedures			#
-sub gen_block () {
-my $dir = shift;
-my $n   = $dir eq "de" ? "n" : "";
-my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
-
-$code.=<<___;
-.globl	.${prefix}_${dir}crypt
-.align	5
-.${prefix}_${dir}crypt:
-	lwz		$rounds,240($key)
-	lis		r0,0xfc00
-	mfspr		$vrsave,256
-	li		$idx,15			# 15 is not typo
-	mtspr		256,r0
-
-	lvx		v0,0,$inp
-	neg		r11,$out
-	lvx		v1,$idx,$inp
-	lvsl		v2,0,$inp		# inpperm
-	le?vspltisb	v4,0x0f
-	?lvsl		v3,0,r11		# outperm
-	le?vxor		v2,v2,v4
-	li		$idx,16
-	vperm		v0,v0,v1,v2		# align [and byte swap in LE]
-	lvx		v1,0,$key
-	?lvsl		v5,0,$key		# keyperm
-	srwi		$rounds,$rounds,1
-	lvx		v2,$idx,$key
-	addi		$idx,$idx,16
-	subi		$rounds,$rounds,1
-	?vperm		v1,v1,v2,v5		# align round key
-
-	vxor		v0,v0,v1
-	lvx		v1,$idx,$key
-	addi		$idx,$idx,16
-	mtctr		$rounds
-
-Loop_${dir}c:
-	?vperm		v2,v2,v1,v5
-	v${n}cipher	v0,v0,v2
-	lvx		v2,$idx,$key
-	addi		$idx,$idx,16
-	?vperm		v1,v1,v2,v5
-	v${n}cipher	v0,v0,v1
-	lvx		v1,$idx,$key
-	addi		$idx,$idx,16
-	bdnz		Loop_${dir}c
-
-	?vperm		v2,v2,v1,v5
-	v${n}cipher	v0,v0,v2
-	lvx		v2,$idx,$key
-	?vperm		v1,v1,v2,v5
-	v${n}cipherlast	v0,v0,v1
-
-	vspltisb	v2,-1
-	vxor		v1,v1,v1
-	li		$idx,15			# 15 is not typo
-	?vperm		v2,v1,v2,v3		# outmask
-	le?vxor		v3,v3,v4
-	lvx		v1,0,$out		# outhead
-	vperm		v0,v0,v0,v3		# rotate [and byte swap in LE]
-	vsel		v1,v1,v0,v2
-	lvx		v4,$idx,$out
-	stvx		v1,0,$out
-	vsel		v0,v0,v4,v2
-	stvx		v0,$idx,$out
-
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,3,0
-	.long		0
-.size	.${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
-___
-}
-&gen_block("en");
-&gen_block("de");
-}}}
-#########################################################################
-{{{	# CBC en- and decrypt procedures				#
-my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
-my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
-						map("v$_",(4..10));
-$code.=<<___;
-.globl	.${prefix}_cbc_encrypt
-.align	5
-.${prefix}_cbc_encrypt:
-	${UCMP}i	$len,16
-	bltlr-
-
-	cmpwi		$enc,0			# test direction
-	lis		r0,0xffe0
-	mfspr		$vrsave,256
-	mtspr		256,r0
-
-	li		$idx,15
-	vxor		$rndkey0,$rndkey0,$rndkey0
-	le?vspltisb	$tmp,0x0f
-
-	lvx		$ivec,0,$ivp		# load [unaligned] iv
-	lvsl		$inpperm,0,$ivp
-	lvx		$inptail,$idx,$ivp
-	le?vxor		$inpperm,$inpperm,$tmp
-	vperm		$ivec,$ivec,$inptail,$inpperm
-
-	neg		r11,$inp
-	?lvsl		$keyperm,0,$key		# prepare for unaligned key
-	lwz		$rounds,240($key)
-
-	lvsr		$inpperm,0,r11		# prepare for unaligned load
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,15		# 15 is not typo
-	le?vxor		$inpperm,$inpperm,$tmp
-
-	?lvsr		$outperm,0,$out		# prepare for unaligned store
-	vspltisb	$outmask,-1
-	lvx		$outhead,0,$out
-	?vperm		$outmask,$rndkey0,$outmask,$outperm
-	le?vxor		$outperm,$outperm,$tmp
-
-	srwi		$rounds,$rounds,1
-	li		$idx,16
-	subi		$rounds,$rounds,1
-	beq		Lcbc_dec
-
-Lcbc_enc:
-	vmr		$inout,$inptail
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,16
-	mtctr		$rounds
-	subi		$len,$len,16		# len-=16
-
-	lvx		$rndkey0,0,$key
-	 vperm		$inout,$inout,$inptail,$inpperm
-	lvx		$rndkey1,$idx,$key
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key
-	addi		$idx,$idx,16
-	vxor		$inout,$inout,$ivec
-
-Loop_cbc_enc:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipher		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key
-	addi		$idx,$idx,16
-	bdnz		Loop_cbc_enc
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key
-	li		$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipherlast	$ivec,$inout,$rndkey0
-	${UCMP}i	$len,16
-
-	vperm		$tmp,$ivec,$ivec,$outperm
-	vsel		$inout,$outhead,$tmp,$outmask
-	vmr		$outhead,$tmp
-	stvx		$inout,0,$out
-	addi		$out,$out,16
-	bge		Lcbc_enc
-
-	b		Lcbc_done
-
-.align	4
-Lcbc_dec:
-	${UCMP}i	$len,128
-	bge		_aesp8_cbc_decrypt8x
-	vmr		$tmp,$inptail
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,16
-	mtctr		$rounds
-	subi		$len,$len,16		# len-=16
-
-	lvx		$rndkey0,0,$key
-	 vperm		$tmp,$tmp,$inptail,$inpperm
-	lvx		$rndkey1,$idx,$key
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$inout,$tmp,$rndkey0
-	lvx		$rndkey0,$idx,$key
-	addi		$idx,$idx,16
-
-Loop_cbc_dec:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vncipher	$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vncipher	$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key
-	addi		$idx,$idx,16
-	bdnz		Loop_cbc_dec
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vncipher	$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key
-	li		$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vncipherlast	$inout,$inout,$rndkey0
-	${UCMP}i	$len,16
-
-	vxor		$inout,$inout,$ivec
-	vmr		$ivec,$tmp
-	vperm		$tmp,$inout,$inout,$outperm
-	vsel		$inout,$outhead,$tmp,$outmask
-	vmr		$outhead,$tmp
-	stvx		$inout,0,$out
-	addi		$out,$out,16
-	bge		Lcbc_dec
-
-Lcbc_done:
-	addi		$out,$out,-1
-	lvx		$inout,0,$out		# redundant in aligned case
-	vsel		$inout,$outhead,$inout,$outmask
-	stvx		$inout,0,$out
-
-	neg		$enc,$ivp		# write [unaligned] iv
-	li		$idx,15			# 15 is not typo
-	vxor		$rndkey0,$rndkey0,$rndkey0
-	vspltisb	$outmask,-1
-	le?vspltisb	$tmp,0x0f
-	?lvsl		$outperm,0,$enc
-	?vperm		$outmask,$rndkey0,$outmask,$outperm
-	le?vxor		$outperm,$outperm,$tmp
-	lvx		$outhead,0,$ivp
-	vperm		$ivec,$ivec,$ivec,$outperm
-	vsel		$inout,$outhead,$ivec,$outmask
-	lvx		$inptail,$idx,$ivp
-	stvx		$inout,0,$ivp
-	vsel		$inout,$ivec,$inptail,$outmask
-	stvx		$inout,$idx,$ivp
-
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,6,0
-	.long		0
-___
-#########################################################################
-{{	# Optimized CBC decrypt procedure				#
-my $key_="r11";
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
-    $x00=0 if ($flavour =~ /osx/);
-my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
-my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
-my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
-			# v26-v31 last 6 round keys
-my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
-
-$code.=<<___;
-.align	5
-_aesp8_cbc_decrypt8x:
-	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
-	li		r10,`$FRAME+8*16+15`
-	li		r11,`$FRAME+8*16+31`
-	stvx		v20,r10,$sp		# ABI says so
-	addi		r10,r10,32
-	stvx		v21,r11,$sp
-	addi		r11,r11,32
-	stvx		v22,r10,$sp
-	addi		r10,r10,32
-	stvx		v23,r11,$sp
-	addi		r11,r11,32
-	stvx		v24,r10,$sp
-	addi		r10,r10,32
-	stvx		v25,r11,$sp
-	addi		r11,r11,32
-	stvx		v26,r10,$sp
-	addi		r10,r10,32
-	stvx		v27,r11,$sp
-	addi		r11,r11,32
-	stvx		v28,r10,$sp
-	addi		r10,r10,32
-	stvx		v29,r11,$sp
-	addi		r11,r11,32
-	stvx		v30,r10,$sp
-	stvx		v31,r11,$sp
-	li		r0,-1
-	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
-	li		$x10,0x10
-	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	li		$x20,0x20
-	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	li		$x30,0x30
-	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	li		$x40,0x40
-	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	li		$x50,0x50
-	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	li		$x60,0x60
-	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	li		$x70,0x70
-	mtspr		256,r0
-
-	subi		$rounds,$rounds,3	# -4 in total
-	subi		$len,$len,128		# bias
-
-	lvx		$rndkey0,$x00,$key	# load key schedule
-	lvx		v30,$x10,$key
-	addi		$key,$key,0x20
-	lvx		v31,$x00,$key
-	?vperm		$rndkey0,$rndkey0,v30,$keyperm
-	addi		$key_,$sp,`$FRAME+15`
-	mtctr		$rounds
-
-Load_cbc_dec_key:
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v30,$x10,$key
-	addi		$key,$key,0x20
-	stvx		v24,$x00,$key_		# off-load round[1]
-	?vperm		v25,v31,v30,$keyperm
-	lvx		v31,$x00,$key
-	stvx		v25,$x10,$key_		# off-load round[2]
-	addi		$key_,$key_,0x20
-	bdnz		Load_cbc_dec_key
-
-	lvx		v26,$x10,$key
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v27,$x20,$key
-	stvx		v24,$x00,$key_		# off-load round[3]
-	?vperm		v25,v31,v26,$keyperm
-	lvx		v28,$x30,$key
-	stvx		v25,$x10,$key_		# off-load round[4]
-	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
-	?vperm		v26,v26,v27,$keyperm
-	lvx		v29,$x40,$key
-	?vperm		v27,v27,v28,$keyperm
-	lvx		v30,$x50,$key
-	?vperm		v28,v28,v29,$keyperm
-	lvx		v31,$x60,$key
-	?vperm		v29,v29,v30,$keyperm
-	lvx		$out0,$x70,$key		# borrow $out0
-	?vperm		v30,v30,v31,$keyperm
-	lvx		v24,$x00,$key_		# pre-load round[1]
-	?vperm		v31,v31,$out0,$keyperm
-	lvx		v25,$x10,$key_		# pre-load round[2]
-
-	#lvx		$inptail,0,$inp		# "caller" already did this
-	#addi		$inp,$inp,15		# 15 is not typo
-	subi		$inp,$inp,15		# undo "caller"
-
-	 le?li		$idx,8
-	lvx_u		$in0,$x00,$inp		# load first 8 "words"
-	 le?lvsl	$inpperm,0,$idx
-	 le?vspltisb	$tmp,0x0f
-	lvx_u		$in1,$x10,$inp
-	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
-	lvx_u		$in2,$x20,$inp
-	 le?vperm	$in0,$in0,$in0,$inpperm
-	lvx_u		$in3,$x30,$inp
-	 le?vperm	$in1,$in1,$in1,$inpperm
-	lvx_u		$in4,$x40,$inp
-	 le?vperm	$in2,$in2,$in2,$inpperm
-	vxor		$out0,$in0,$rndkey0
-	lvx_u		$in5,$x50,$inp
-	 le?vperm	$in3,$in3,$in3,$inpperm
-	vxor		$out1,$in1,$rndkey0
-	lvx_u		$in6,$x60,$inp
-	 le?vperm	$in4,$in4,$in4,$inpperm
-	vxor		$out2,$in2,$rndkey0
-	lvx_u		$in7,$x70,$inp
-	addi		$inp,$inp,0x80
-	 le?vperm	$in5,$in5,$in5,$inpperm
-	vxor		$out3,$in3,$rndkey0
-	 le?vperm	$in6,$in6,$in6,$inpperm
-	vxor		$out4,$in4,$rndkey0
-	 le?vperm	$in7,$in7,$in7,$inpperm
-	vxor		$out5,$in5,$rndkey0
-	vxor		$out6,$in6,$rndkey0
-	vxor		$out7,$in7,$rndkey0
-
-	mtctr		$rounds
-	b		Loop_cbc_dec8x
-.align	5
-Loop_cbc_dec8x:
-	vncipher	$out0,$out0,v24
-	vncipher	$out1,$out1,v24
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	vncipher	$out4,$out4,v24
-	vncipher	$out5,$out5,v24
-	vncipher	$out6,$out6,v24
-	vncipher	$out7,$out7,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vncipher	$out0,$out0,v25
-	vncipher	$out1,$out1,v25
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	vncipher	$out4,$out4,v25
-	vncipher	$out5,$out5,v25
-	vncipher	$out6,$out6,v25
-	vncipher	$out7,$out7,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Loop_cbc_dec8x
-
-	subic		$len,$len,128		# $len-=128
-	vncipher	$out0,$out0,v24
-	vncipher	$out1,$out1,v24
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	vncipher	$out4,$out4,v24
-	vncipher	$out5,$out5,v24
-	vncipher	$out6,$out6,v24
-	vncipher	$out7,$out7,v24
-
-	subfe.		r0,r0,r0		# borrow?-1:0
-	vncipher	$out0,$out0,v25
-	vncipher	$out1,$out1,v25
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	vncipher	$out4,$out4,v25
-	vncipher	$out5,$out5,v25
-	vncipher	$out6,$out6,v25
-	vncipher	$out7,$out7,v25
-
-	and		r0,r0,$len
-	vncipher	$out0,$out0,v26
-	vncipher	$out1,$out1,v26
-	vncipher	$out2,$out2,v26
-	vncipher	$out3,$out3,v26
-	vncipher	$out4,$out4,v26
-	vncipher	$out5,$out5,v26
-	vncipher	$out6,$out6,v26
-	vncipher	$out7,$out7,v26
-
-	add		$inp,$inp,r0		# $inp is adjusted in such
-						# way that at exit from the
-						# loop inX-in7 are loaded
-						# with last "words"
-	vncipher	$out0,$out0,v27
-	vncipher	$out1,$out1,v27
-	vncipher	$out2,$out2,v27
-	vncipher	$out3,$out3,v27
-	vncipher	$out4,$out4,v27
-	vncipher	$out5,$out5,v27
-	vncipher	$out6,$out6,v27
-	vncipher	$out7,$out7,v27
-
-	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
-	vncipher	$out0,$out0,v28
-	vncipher	$out1,$out1,v28
-	vncipher	$out2,$out2,v28
-	vncipher	$out3,$out3,v28
-	vncipher	$out4,$out4,v28
-	vncipher	$out5,$out5,v28
-	vncipher	$out6,$out6,v28
-	vncipher	$out7,$out7,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-
-	vncipher	$out0,$out0,v29
-	vncipher	$out1,$out1,v29
-	vncipher	$out2,$out2,v29
-	vncipher	$out3,$out3,v29
-	vncipher	$out4,$out4,v29
-	vncipher	$out5,$out5,v29
-	vncipher	$out6,$out6,v29
-	vncipher	$out7,$out7,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-
-	vncipher	$out0,$out0,v30
-	 vxor		$ivec,$ivec,v31		# xor with last round key
-	vncipher	$out1,$out1,v30
-	 vxor		$in0,$in0,v31
-	vncipher	$out2,$out2,v30
-	 vxor		$in1,$in1,v31
-	vncipher	$out3,$out3,v30
-	 vxor		$in2,$in2,v31
-	vncipher	$out4,$out4,v30
-	 vxor		$in3,$in3,v31
-	vncipher	$out5,$out5,v30
-	 vxor		$in4,$in4,v31
-	vncipher	$out6,$out6,v30
-	 vxor		$in5,$in5,v31
-	vncipher	$out7,$out7,v30
-	 vxor		$in6,$in6,v31
-
-	vncipherlast	$out0,$out0,$ivec
-	vncipherlast	$out1,$out1,$in0
-	 lvx_u		$in0,$x00,$inp		# load next input block
-	vncipherlast	$out2,$out2,$in1
-	 lvx_u		$in1,$x10,$inp
-	vncipherlast	$out3,$out3,$in2
-	 le?vperm	$in0,$in0,$in0,$inpperm
-	 lvx_u		$in2,$x20,$inp
-	vncipherlast	$out4,$out4,$in3
-	 le?vperm	$in1,$in1,$in1,$inpperm
-	 lvx_u		$in3,$x30,$inp
-	vncipherlast	$out5,$out5,$in4
-	 le?vperm	$in2,$in2,$in2,$inpperm
-	 lvx_u		$in4,$x40,$inp
-	vncipherlast	$out6,$out6,$in5
-	 le?vperm	$in3,$in3,$in3,$inpperm
-	 lvx_u		$in5,$x50,$inp
-	vncipherlast	$out7,$out7,$in6
-	 le?vperm	$in4,$in4,$in4,$inpperm
-	 lvx_u		$in6,$x60,$inp
-	vmr		$ivec,$in7
-	 le?vperm	$in5,$in5,$in5,$inpperm
-	 lvx_u		$in7,$x70,$inp
-	 addi		$inp,$inp,0x80
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	 le?vperm	$in6,$in6,$in6,$inpperm
-	 vxor		$out0,$in0,$rndkey0
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x10,$out
-	 le?vperm	$in7,$in7,$in7,$inpperm
-	 vxor		$out1,$in1,$rndkey0
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x20,$out
-	 vxor		$out2,$in2,$rndkey0
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x30,$out
-	 vxor		$out3,$in3,$rndkey0
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x40,$out
-	 vxor		$out4,$in4,$rndkey0
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x50,$out
-	 vxor		$out5,$in5,$rndkey0
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x60,$out
-	 vxor		$out6,$in6,$rndkey0
-	stvx_u		$out7,$x70,$out
-	addi		$out,$out,0x80
-	 vxor		$out7,$in7,$rndkey0
-
-	mtctr		$rounds
-	beq		Loop_cbc_dec8x		# did $len-=128 borrow?
-
-	addic.		$len,$len,128
-	beq		Lcbc_dec8x_done
-	nop
-	nop
-
-Loop_cbc_dec8x_tail:				# up to 7 "words" tail...
-	vncipher	$out1,$out1,v24
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	vncipher	$out4,$out4,v24
-	vncipher	$out5,$out5,v24
-	vncipher	$out6,$out6,v24
-	vncipher	$out7,$out7,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vncipher	$out1,$out1,v25
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	vncipher	$out4,$out4,v25
-	vncipher	$out5,$out5,v25
-	vncipher	$out6,$out6,v25
-	vncipher	$out7,$out7,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Loop_cbc_dec8x_tail
-
-	vncipher	$out1,$out1,v24
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	vncipher	$out4,$out4,v24
-	vncipher	$out5,$out5,v24
-	vncipher	$out6,$out6,v24
-	vncipher	$out7,$out7,v24
-
-	vncipher	$out1,$out1,v25
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	vncipher	$out4,$out4,v25
-	vncipher	$out5,$out5,v25
-	vncipher	$out6,$out6,v25
-	vncipher	$out7,$out7,v25
-
-	vncipher	$out1,$out1,v26
-	vncipher	$out2,$out2,v26
-	vncipher	$out3,$out3,v26
-	vncipher	$out4,$out4,v26
-	vncipher	$out5,$out5,v26
-	vncipher	$out6,$out6,v26
-	vncipher	$out7,$out7,v26
-
-	vncipher	$out1,$out1,v27
-	vncipher	$out2,$out2,v27
-	vncipher	$out3,$out3,v27
-	vncipher	$out4,$out4,v27
-	vncipher	$out5,$out5,v27
-	vncipher	$out6,$out6,v27
-	vncipher	$out7,$out7,v27
-
-	vncipher	$out1,$out1,v28
-	vncipher	$out2,$out2,v28
-	vncipher	$out3,$out3,v28
-	vncipher	$out4,$out4,v28
-	vncipher	$out5,$out5,v28
-	vncipher	$out6,$out6,v28
-	vncipher	$out7,$out7,v28
-
-	vncipher	$out1,$out1,v29
-	vncipher	$out2,$out2,v29
-	vncipher	$out3,$out3,v29
-	vncipher	$out4,$out4,v29
-	vncipher	$out5,$out5,v29
-	vncipher	$out6,$out6,v29
-	vncipher	$out7,$out7,v29
-
-	vncipher	$out1,$out1,v30
-	 vxor		$ivec,$ivec,v31		# last round key
-	vncipher	$out2,$out2,v30
-	 vxor		$in1,$in1,v31
-	vncipher	$out3,$out3,v30
-	 vxor		$in2,$in2,v31
-	vncipher	$out4,$out4,v30
-	 vxor		$in3,$in3,v31
-	vncipher	$out5,$out5,v30
-	 vxor		$in4,$in4,v31
-	vncipher	$out6,$out6,v30
-	 vxor		$in5,$in5,v31
-	vncipher	$out7,$out7,v30
-	 vxor		$in6,$in6,v31
-
-	cmplwi		$len,32			# switch($len)
-	blt		Lcbc_dec8x_one
-	nop
-	beq		Lcbc_dec8x_two
-	cmplwi		$len,64
-	blt		Lcbc_dec8x_three
-	nop
-	beq		Lcbc_dec8x_four
-	cmplwi		$len,96
-	blt		Lcbc_dec8x_five
-	nop
-	beq		Lcbc_dec8x_six
-
-Lcbc_dec8x_seven:
-	vncipherlast	$out1,$out1,$ivec
-	vncipherlast	$out2,$out2,$in1
-	vncipherlast	$out3,$out3,$in2
-	vncipherlast	$out4,$out4,$in3
-	vncipherlast	$out5,$out5,$in4
-	vncipherlast	$out6,$out6,$in5
-	vncipherlast	$out7,$out7,$in6
-	vmr		$ivec,$in7
-
-	le?vperm	$out1,$out1,$out1,$inpperm
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x00,$out
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x10,$out
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x20,$out
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x30,$out
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x40,$out
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x50,$out
-	stvx_u		$out7,$x60,$out
-	addi		$out,$out,0x70
-	b		Lcbc_dec8x_done
-
-.align	5
-Lcbc_dec8x_six:
-	vncipherlast	$out2,$out2,$ivec
-	vncipherlast	$out3,$out3,$in2
-	vncipherlast	$out4,$out4,$in3
-	vncipherlast	$out5,$out5,$in4
-	vncipherlast	$out6,$out6,$in5
-	vncipherlast	$out7,$out7,$in6
-	vmr		$ivec,$in7
-
-	le?vperm	$out2,$out2,$out2,$inpperm
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x00,$out
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x10,$out
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x20,$out
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x30,$out
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x40,$out
-	stvx_u		$out7,$x50,$out
-	addi		$out,$out,0x60
-	b		Lcbc_dec8x_done
-
-.align	5
-Lcbc_dec8x_five:
-	vncipherlast	$out3,$out3,$ivec
-	vncipherlast	$out4,$out4,$in3
-	vncipherlast	$out5,$out5,$in4
-	vncipherlast	$out6,$out6,$in5
-	vncipherlast	$out7,$out7,$in6
-	vmr		$ivec,$in7
-
-	le?vperm	$out3,$out3,$out3,$inpperm
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x00,$out
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x10,$out
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x20,$out
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x30,$out
-	stvx_u		$out7,$x40,$out
-	addi		$out,$out,0x50
-	b		Lcbc_dec8x_done
-
-.align	5
-Lcbc_dec8x_four:
-	vncipherlast	$out4,$out4,$ivec
-	vncipherlast	$out5,$out5,$in4
-	vncipherlast	$out6,$out6,$in5
-	vncipherlast	$out7,$out7,$in6
-	vmr		$ivec,$in7
-
-	le?vperm	$out4,$out4,$out4,$inpperm
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x00,$out
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x10,$out
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x20,$out
-	stvx_u		$out7,$x30,$out
-	addi		$out,$out,0x40
-	b		Lcbc_dec8x_done
-
-.align	5
-Lcbc_dec8x_three:
-	vncipherlast	$out5,$out5,$ivec
-	vncipherlast	$out6,$out6,$in5
-	vncipherlast	$out7,$out7,$in6
-	vmr		$ivec,$in7
-
-	le?vperm	$out5,$out5,$out5,$inpperm
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x00,$out
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x10,$out
-	stvx_u		$out7,$x20,$out
-	addi		$out,$out,0x30
-	b		Lcbc_dec8x_done
-
-.align	5
-Lcbc_dec8x_two:
-	vncipherlast	$out6,$out6,$ivec
-	vncipherlast	$out7,$out7,$in6
-	vmr		$ivec,$in7
-
-	le?vperm	$out6,$out6,$out6,$inpperm
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x00,$out
-	stvx_u		$out7,$x10,$out
-	addi		$out,$out,0x20
-	b		Lcbc_dec8x_done
-
-.align	5
-Lcbc_dec8x_one:
-	vncipherlast	$out7,$out7,$ivec
-	vmr		$ivec,$in7
-
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out7,0,$out
-	addi		$out,$out,0x10
-
-Lcbc_dec8x_done:
-	le?vperm	$ivec,$ivec,$ivec,$inpperm
-	stvx_u		$ivec,0,$ivp		# write [unaligned] iv
-
-	li		r10,`$FRAME+15`
-	li		r11,`$FRAME+31`
-	stvx		$inpperm,r10,$sp	# wipe copies of round keys
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-	stvx		$inpperm,r10,$sp
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-	stvx		$inpperm,r10,$sp
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-	stvx		$inpperm,r10,$sp
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-
-	mtspr		256,$vrsave
-	lvx		v20,r10,$sp		# ABI says so
-	addi		r10,r10,32
-	lvx		v21,r11,$sp
-	addi		r11,r11,32
-	lvx		v22,r10,$sp
-	addi		r10,r10,32
-	lvx		v23,r11,$sp
-	addi		r11,r11,32
-	lvx		v24,r10,$sp
-	addi		r10,r10,32
-	lvx		v25,r11,$sp
-	addi		r11,r11,32
-	lvx		v26,r10,$sp
-	addi		r10,r10,32
-	lvx		v27,r11,$sp
-	addi		r11,r11,32
-	lvx		v28,r10,$sp
-	addi		r10,r10,32
-	lvx		v29,r11,$sp
-	addi		r11,r11,32
-	lvx		v30,r10,$sp
-	lvx		v31,r11,$sp
-	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
-	blr
-	.long		0
-	.byte		0,12,0x04,0,0x80,6,6,0
-	.long		0
-.size	.${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
-___
-}}	}}}
-
-#########################################################################
-{{{	# CTR procedure[s]						#
-my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
-my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
-						map("v$_",(4..11));
-my $dat=$tmp;
-
-$code.=<<___;
-.globl	.${prefix}_ctr32_encrypt_blocks
-.align	5
-.${prefix}_ctr32_encrypt_blocks:
-	${UCMP}i	$len,1
-	bltlr-
-
-	lis		r0,0xfff0
-	mfspr		$vrsave,256
-	mtspr		256,r0
-
-	li		$idx,15
-	vxor		$rndkey0,$rndkey0,$rndkey0
-	le?vspltisb	$tmp,0x0f
-
-	lvx		$ivec,0,$ivp		# load [unaligned] iv
-	lvsl		$inpperm,0,$ivp
-	lvx		$inptail,$idx,$ivp
-	 vspltisb	$one,1
-	le?vxor		$inpperm,$inpperm,$tmp
-	vperm		$ivec,$ivec,$inptail,$inpperm
-	 vsldoi		$one,$rndkey0,$one,1
-
-	neg		r11,$inp
-	?lvsl		$keyperm,0,$key		# prepare for unaligned key
-	lwz		$rounds,240($key)
-
-	lvsr		$inpperm,0,r11		# prepare for unaligned load
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,15		# 15 is not typo
-	le?vxor		$inpperm,$inpperm,$tmp
-
-	srwi		$rounds,$rounds,1
-	li		$idx,16
-	subi		$rounds,$rounds,1
-
-	${UCMP}i	$len,8
-	bge		_aesp8_ctr32_encrypt8x
-
-	?lvsr		$outperm,0,$out		# prepare for unaligned store
-	vspltisb	$outmask,-1
-	lvx		$outhead,0,$out
-	?vperm		$outmask,$rndkey0,$outmask,$outperm
-	le?vxor		$outperm,$outperm,$tmp
-
-	lvx		$rndkey0,0,$key
-	mtctr		$rounds
-	lvx		$rndkey1,$idx,$key
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$inout,$ivec,$rndkey0
-	lvx		$rndkey0,$idx,$key
-	addi		$idx,$idx,16
-	b		Loop_ctr32_enc
-
-.align	5
-Loop_ctr32_enc:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipher		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key
-	addi		$idx,$idx,16
-	bdnz		Loop_ctr32_enc
-
-	vadduwm		$ivec,$ivec,$one
-	 vmr		$dat,$inptail
-	 lvx		$inptail,0,$inp
-	 addi		$inp,$inp,16
-	 subic.		$len,$len,1		# blocks--
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key
-	 vperm		$dat,$dat,$inptail,$inpperm
-	 li		$idx,16
-	?vperm		$rndkey1,$rndkey0,$rndkey1,$keyperm
-	 lvx		$rndkey0,0,$key
-	vxor		$dat,$dat,$rndkey1	# last round key
-	vcipherlast	$inout,$inout,$dat
-
-	 lvx		$rndkey1,$idx,$key
-	 addi		$idx,$idx,16
-	vperm		$inout,$inout,$inout,$outperm
-	vsel		$dat,$outhead,$inout,$outmask
-	 mtctr		$rounds
-	 ?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vmr		$outhead,$inout
-	 vxor		$inout,$ivec,$rndkey0
-	 lvx		$rndkey0,$idx,$key
-	 addi		$idx,$idx,16
-	stvx		$dat,0,$out
-	addi		$out,$out,16
-	bne		Loop_ctr32_enc
-
-	addi		$out,$out,-1
-	lvx		$inout,0,$out		# redundant in aligned case
-	vsel		$inout,$outhead,$inout,$outmask
-	stvx		$inout,0,$out
-
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,6,0
-	.long		0
-___
-#########################################################################
-{{	# Optimized CTR procedure					#
-my $key_="r11";
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
-    $x00=0 if ($flavour =~ /osx/);
-my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
-my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
-my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
-			# v26-v31 last 6 round keys
-my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
-my ($two,$three,$four)=($outhead,$outperm,$outmask);
-
-$code.=<<___;
-.align	5
-_aesp8_ctr32_encrypt8x:
-	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
-	li		r10,`$FRAME+8*16+15`
-	li		r11,`$FRAME+8*16+31`
-	stvx		v20,r10,$sp		# ABI says so
-	addi		r10,r10,32
-	stvx		v21,r11,$sp
-	addi		r11,r11,32
-	stvx		v22,r10,$sp
-	addi		r10,r10,32
-	stvx		v23,r11,$sp
-	addi		r11,r11,32
-	stvx		v24,r10,$sp
-	addi		r10,r10,32
-	stvx		v25,r11,$sp
-	addi		r11,r11,32
-	stvx		v26,r10,$sp
-	addi		r10,r10,32
-	stvx		v27,r11,$sp
-	addi		r11,r11,32
-	stvx		v28,r10,$sp
-	addi		r10,r10,32
-	stvx		v29,r11,$sp
-	addi		r11,r11,32
-	stvx		v30,r10,$sp
-	stvx		v31,r11,$sp
-	li		r0,-1
-	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
-	li		$x10,0x10
-	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	li		$x20,0x20
-	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	li		$x30,0x30
-	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	li		$x40,0x40
-	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	li		$x50,0x50
-	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	li		$x60,0x60
-	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	li		$x70,0x70
-	mtspr		256,r0
-
-	subi		$rounds,$rounds,3	# -4 in total
-
-	lvx		$rndkey0,$x00,$key	# load key schedule
-	lvx		v30,$x10,$key
-	addi		$key,$key,0x20
-	lvx		v31,$x00,$key
-	?vperm		$rndkey0,$rndkey0,v30,$keyperm
-	addi		$key_,$sp,`$FRAME+15`
-	mtctr		$rounds
-
-Load_ctr32_enc_key:
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v30,$x10,$key
-	addi		$key,$key,0x20
-	stvx		v24,$x00,$key_		# off-load round[1]
-	?vperm		v25,v31,v30,$keyperm
-	lvx		v31,$x00,$key
-	stvx		v25,$x10,$key_		# off-load round[2]
-	addi		$key_,$key_,0x20
-	bdnz		Load_ctr32_enc_key
-
-	lvx		v26,$x10,$key
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v27,$x20,$key
-	stvx		v24,$x00,$key_		# off-load round[3]
-	?vperm		v25,v31,v26,$keyperm
-	lvx		v28,$x30,$key
-	stvx		v25,$x10,$key_		# off-load round[4]
-	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
-	?vperm		v26,v26,v27,$keyperm
-	lvx		v29,$x40,$key
-	?vperm		v27,v27,v28,$keyperm
-	lvx		v30,$x50,$key
-	?vperm		v28,v28,v29,$keyperm
-	lvx		v31,$x60,$key
-	?vperm		v29,v29,v30,$keyperm
-	lvx		$out0,$x70,$key		# borrow $out0
-	?vperm		v30,v30,v31,$keyperm
-	lvx		v24,$x00,$key_		# pre-load round[1]
-	?vperm		v31,v31,$out0,$keyperm
-	lvx		v25,$x10,$key_		# pre-load round[2]
-
-	vadduwm		$two,$one,$one
-	subi		$inp,$inp,15		# undo "caller"
-	$SHL		$len,$len,4
-
-	vadduwm		$out1,$ivec,$one	# counter values ...
-	vadduwm		$out2,$ivec,$two
-	vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
-	 le?li		$idx,8
-	vadduwm		$out3,$out1,$two
-	vxor		$out1,$out1,$rndkey0
-	 le?lvsl	$inpperm,0,$idx
-	vadduwm		$out4,$out2,$two
-	vxor		$out2,$out2,$rndkey0
-	 le?vspltisb	$tmp,0x0f
-	vadduwm		$out5,$out3,$two
-	vxor		$out3,$out3,$rndkey0
-	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
-	vadduwm		$out6,$out4,$two
-	vxor		$out4,$out4,$rndkey0
-	vadduwm		$out7,$out5,$two
-	vxor		$out5,$out5,$rndkey0
-	vadduwm		$ivec,$out6,$two	# next counter value
-	vxor		$out6,$out6,$rndkey0
-	vxor		$out7,$out7,$rndkey0
-
-	mtctr		$rounds
-	b		Loop_ctr32_enc8x
-.align	5
-Loop_ctr32_enc8x:
-	vcipher 	$out0,$out0,v24
-	vcipher 	$out1,$out1,v24
-	vcipher 	$out2,$out2,v24
-	vcipher 	$out3,$out3,v24
-	vcipher 	$out4,$out4,v24
-	vcipher 	$out5,$out5,v24
-	vcipher 	$out6,$out6,v24
-	vcipher 	$out7,$out7,v24
-Loop_ctr32_enc8x_middle:
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vcipher 	$out0,$out0,v25
-	vcipher 	$out1,$out1,v25
-	vcipher 	$out2,$out2,v25
-	vcipher 	$out3,$out3,v25
-	vcipher 	$out4,$out4,v25
-	vcipher 	$out5,$out5,v25
-	vcipher 	$out6,$out6,v25
-	vcipher 	$out7,$out7,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Loop_ctr32_enc8x
-
-	subic		r11,$len,256		# $len-256, borrow $key_
-	vcipher 	$out0,$out0,v24
-	vcipher 	$out1,$out1,v24
-	vcipher 	$out2,$out2,v24
-	vcipher 	$out3,$out3,v24
-	vcipher 	$out4,$out4,v24
-	vcipher 	$out5,$out5,v24
-	vcipher 	$out6,$out6,v24
-	vcipher 	$out7,$out7,v24
-
-	subfe		r0,r0,r0		# borrow?-1:0
-	vcipher 	$out0,$out0,v25
-	vcipher 	$out1,$out1,v25
-	vcipher 	$out2,$out2,v25
-	vcipher 	$out3,$out3,v25
-	vcipher 	$out4,$out4,v25
-	vcipher		$out5,$out5,v25
-	vcipher		$out6,$out6,v25
-	vcipher		$out7,$out7,v25
-
-	and		r0,r0,r11
-	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
-	vcipher		$out0,$out0,v26
-	vcipher		$out1,$out1,v26
-	vcipher		$out2,$out2,v26
-	vcipher		$out3,$out3,v26
-	vcipher		$out4,$out4,v26
-	vcipher		$out5,$out5,v26
-	vcipher		$out6,$out6,v26
-	vcipher		$out7,$out7,v26
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-
-	subic		$len,$len,129		# $len-=129
-	vcipher		$out0,$out0,v27
-	addi		$len,$len,1		# $len-=128 really
-	vcipher		$out1,$out1,v27
-	vcipher		$out2,$out2,v27
-	vcipher		$out3,$out3,v27
-	vcipher		$out4,$out4,v27
-	vcipher		$out5,$out5,v27
-	vcipher		$out6,$out6,v27
-	vcipher		$out7,$out7,v27
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-
-	vcipher		$out0,$out0,v28
-	 lvx_u		$in0,$x00,$inp		# load input
-	vcipher		$out1,$out1,v28
-	 lvx_u		$in1,$x10,$inp
-	vcipher		$out2,$out2,v28
-	 lvx_u		$in2,$x20,$inp
-	vcipher		$out3,$out3,v28
-	 lvx_u		$in3,$x30,$inp
-	vcipher		$out4,$out4,v28
-	 lvx_u		$in4,$x40,$inp
-	vcipher		$out5,$out5,v28
-	 lvx_u		$in5,$x50,$inp
-	vcipher		$out6,$out6,v28
-	 lvx_u		$in6,$x60,$inp
-	vcipher		$out7,$out7,v28
-	 lvx_u		$in7,$x70,$inp
-	 addi		$inp,$inp,0x80
-
-	vcipher		$out0,$out0,v29
-	 le?vperm	$in0,$in0,$in0,$inpperm
-	vcipher		$out1,$out1,v29
-	 le?vperm	$in1,$in1,$in1,$inpperm
-	vcipher		$out2,$out2,v29
-	 le?vperm	$in2,$in2,$in2,$inpperm
-	vcipher		$out3,$out3,v29
-	 le?vperm	$in3,$in3,$in3,$inpperm
-	vcipher		$out4,$out4,v29
-	 le?vperm	$in4,$in4,$in4,$inpperm
-	vcipher		$out5,$out5,v29
-	 le?vperm	$in5,$in5,$in5,$inpperm
-	vcipher		$out6,$out6,v29
-	 le?vperm	$in6,$in6,$in6,$inpperm
-	vcipher		$out7,$out7,v29
-	 le?vperm	$in7,$in7,$in7,$inpperm
-
-	add		$inp,$inp,r0		# $inp is adjusted in such
-						# way that at exit from the
-						# loop inX-in7 are loaded
-						# with last "words"
-	subfe.		r0,r0,r0		# borrow?-1:0
-	vcipher		$out0,$out0,v30
-	 vxor		$in0,$in0,v31		# xor with last round key
-	vcipher		$out1,$out1,v30
-	 vxor		$in1,$in1,v31
-	vcipher		$out2,$out2,v30
-	 vxor		$in2,$in2,v31
-	vcipher		$out3,$out3,v30
-	 vxor		$in3,$in3,v31
-	vcipher		$out4,$out4,v30
-	 vxor		$in4,$in4,v31
-	vcipher		$out5,$out5,v30
-	 vxor		$in5,$in5,v31
-	vcipher		$out6,$out6,v30
-	 vxor		$in6,$in6,v31
-	vcipher		$out7,$out7,v30
-	 vxor		$in7,$in7,v31
-
-	bne		Lctr32_enc8x_break	# did $len-129 borrow?
-
-	vcipherlast	$in0,$out0,$in0
-	vcipherlast	$in1,$out1,$in1
-	 vadduwm	$out1,$ivec,$one	# counter values ...
-	vcipherlast	$in2,$out2,$in2
-	 vadduwm	$out2,$ivec,$two
-	 vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
-	vcipherlast	$in3,$out3,$in3
-	 vadduwm	$out3,$out1,$two
-	 vxor		$out1,$out1,$rndkey0
-	vcipherlast	$in4,$out4,$in4
-	 vadduwm	$out4,$out2,$two
-	 vxor		$out2,$out2,$rndkey0
-	vcipherlast	$in5,$out5,$in5
-	 vadduwm	$out5,$out3,$two
-	 vxor		$out3,$out3,$rndkey0
-	vcipherlast	$in6,$out6,$in6
-	 vadduwm	$out6,$out4,$two
-	 vxor		$out4,$out4,$rndkey0
-	vcipherlast	$in7,$out7,$in7
-	 vadduwm	$out7,$out5,$two
-	 vxor		$out5,$out5,$rndkey0
-	le?vperm	$in0,$in0,$in0,$inpperm
-	 vadduwm	$ivec,$out6,$two	# next counter value
-	 vxor		$out6,$out6,$rndkey0
-	le?vperm	$in1,$in1,$in1,$inpperm
-	 vxor		$out7,$out7,$rndkey0
-	mtctr		$rounds
-
-	 vcipher	$out0,$out0,v24
-	stvx_u		$in0,$x00,$out
-	le?vperm	$in2,$in2,$in2,$inpperm
-	 vcipher	$out1,$out1,v24
-	stvx_u		$in1,$x10,$out
-	le?vperm	$in3,$in3,$in3,$inpperm
-	 vcipher	$out2,$out2,v24
-	stvx_u		$in2,$x20,$out
-	le?vperm	$in4,$in4,$in4,$inpperm
-	 vcipher	$out3,$out3,v24
-	stvx_u		$in3,$x30,$out
-	le?vperm	$in5,$in5,$in5,$inpperm
-	 vcipher	$out4,$out4,v24
-	stvx_u		$in4,$x40,$out
-	le?vperm	$in6,$in6,$in6,$inpperm
-	 vcipher	$out5,$out5,v24
-	stvx_u		$in5,$x50,$out
-	le?vperm	$in7,$in7,$in7,$inpperm
-	 vcipher	$out6,$out6,v24
-	stvx_u		$in6,$x60,$out
-	 vcipher	$out7,$out7,v24
-	stvx_u		$in7,$x70,$out
-	addi		$out,$out,0x80
-
-	b		Loop_ctr32_enc8x_middle
-
-.align	5
-Lctr32_enc8x_break:
-	cmpwi		$len,-0x60
-	blt		Lctr32_enc8x_one
-	nop
-	beq		Lctr32_enc8x_two
-	cmpwi		$len,-0x40
-	blt		Lctr32_enc8x_three
-	nop
-	beq		Lctr32_enc8x_four
-	cmpwi		$len,-0x20
-	blt		Lctr32_enc8x_five
-	nop
-	beq		Lctr32_enc8x_six
-	cmpwi		$len,0x00
-	blt		Lctr32_enc8x_seven
-
-Lctr32_enc8x_eight:
-	vcipherlast	$out0,$out0,$in0
-	vcipherlast	$out1,$out1,$in1
-	vcipherlast	$out2,$out2,$in2
-	vcipherlast	$out3,$out3,$in3
-	vcipherlast	$out4,$out4,$in4
-	vcipherlast	$out5,$out5,$in5
-	vcipherlast	$out6,$out6,$in6
-	vcipherlast	$out7,$out7,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x20,$out
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x30,$out
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x40,$out
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x50,$out
-	le?vperm	$out7,$out7,$out7,$inpperm
-	stvx_u		$out6,$x60,$out
-	stvx_u		$out7,$x70,$out
-	addi		$out,$out,0x80
-	b		Lctr32_enc8x_done
-
-.align	5
-Lctr32_enc8x_seven:
-	vcipherlast	$out0,$out0,$in1
-	vcipherlast	$out1,$out1,$in2
-	vcipherlast	$out2,$out2,$in3
-	vcipherlast	$out3,$out3,$in4
-	vcipherlast	$out4,$out4,$in5
-	vcipherlast	$out5,$out5,$in6
-	vcipherlast	$out6,$out6,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x20,$out
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x30,$out
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x40,$out
-	le?vperm	$out6,$out6,$out6,$inpperm
-	stvx_u		$out5,$x50,$out
-	stvx_u		$out6,$x60,$out
-	addi		$out,$out,0x70
-	b		Lctr32_enc8x_done
-
-.align	5
-Lctr32_enc8x_six:
-	vcipherlast	$out0,$out0,$in2
-	vcipherlast	$out1,$out1,$in3
-	vcipherlast	$out2,$out2,$in4
-	vcipherlast	$out3,$out3,$in5
-	vcipherlast	$out4,$out4,$in6
-	vcipherlast	$out5,$out5,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x20,$out
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x30,$out
-	le?vperm	$out5,$out5,$out5,$inpperm
-	stvx_u		$out4,$x40,$out
-	stvx_u		$out5,$x50,$out
-	addi		$out,$out,0x60
-	b		Lctr32_enc8x_done
-
-.align	5
-Lctr32_enc8x_five:
-	vcipherlast	$out0,$out0,$in3
-	vcipherlast	$out1,$out1,$in4
-	vcipherlast	$out2,$out2,$in5
-	vcipherlast	$out3,$out3,$in6
-	vcipherlast	$out4,$out4,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x20,$out
-	le?vperm	$out4,$out4,$out4,$inpperm
-	stvx_u		$out3,$x30,$out
-	stvx_u		$out4,$x40,$out
-	addi		$out,$out,0x50
-	b		Lctr32_enc8x_done
-
-.align	5
-Lctr32_enc8x_four:
-	vcipherlast	$out0,$out0,$in4
-	vcipherlast	$out1,$out1,$in5
-	vcipherlast	$out2,$out2,$in6
-	vcipherlast	$out3,$out3,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$inpperm
-	stvx_u		$out2,$x20,$out
-	stvx_u		$out3,$x30,$out
-	addi		$out,$out,0x40
-	b		Lctr32_enc8x_done
-
-.align	5
-Lctr32_enc8x_three:
-	vcipherlast	$out0,$out0,$in5
-	vcipherlast	$out1,$out1,$in6
-	vcipherlast	$out2,$out2,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	le?vperm	$out2,$out2,$out2,$inpperm
-	stvx_u		$out1,$x10,$out
-	stvx_u		$out2,$x20,$out
-	addi		$out,$out,0x30
-	b		Lctr32_enc8x_done
-
-.align	5
-Lctr32_enc8x_two:
-	vcipherlast	$out0,$out0,$in6
-	vcipherlast	$out1,$out1,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	le?vperm	$out1,$out1,$out1,$inpperm
-	stvx_u		$out0,$x00,$out
-	stvx_u		$out1,$x10,$out
-	addi		$out,$out,0x20
-	b		Lctr32_enc8x_done
-
-.align	5
-Lctr32_enc8x_one:
-	vcipherlast	$out0,$out0,$in7
-
-	le?vperm	$out0,$out0,$out0,$inpperm
-	stvx_u		$out0,0,$out
-	addi		$out,$out,0x10
-
-Lctr32_enc8x_done:
-	li		r10,`$FRAME+15`
-	li		r11,`$FRAME+31`
-	stvx		$inpperm,r10,$sp	# wipe copies of round keys
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-	stvx		$inpperm,r10,$sp
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-	stvx		$inpperm,r10,$sp
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-	stvx		$inpperm,r10,$sp
-	addi		r10,r10,32
-	stvx		$inpperm,r11,$sp
-	addi		r11,r11,32
-
-	mtspr		256,$vrsave
-	lvx		v20,r10,$sp		# ABI says so
-	addi		r10,r10,32
-	lvx		v21,r11,$sp
-	addi		r11,r11,32
-	lvx		v22,r10,$sp
-	addi		r10,r10,32
-	lvx		v23,r11,$sp
-	addi		r11,r11,32
-	lvx		v24,r10,$sp
-	addi		r10,r10,32
-	lvx		v25,r11,$sp
-	addi		r11,r11,32
-	lvx		v26,r10,$sp
-	addi		r10,r10,32
-	lvx		v27,r11,$sp
-	addi		r11,r11,32
-	lvx		v28,r10,$sp
-	addi		r10,r10,32
-	lvx		v29,r11,$sp
-	addi		r11,r11,32
-	lvx		v30,r10,$sp
-	lvx		v31,r11,$sp
-	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
-	blr
-	.long		0
-	.byte		0,12,0x04,0,0x80,6,6,0
-	.long		0
-.size	.${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
-___
-}}	}}}
-
-#########################################################################
-{{{	# XTS procedures						#
-# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,	#
-#                             const AES_KEY *key1, const AES_KEY *key2,	#
-#                             [const] unsigned char iv[16]);		#
-# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which	#
-# input tweak value is assumed to be encrypted already, and last tweak	#
-# value, one suitable for consecutive call on same chunk of data, is	#
-# written back to original buffer. In addition, in "tweak chaining"	#
-# mode only complete input blocks are processed.			#
-
-my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =	map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout) =				map("v$_",(0..2));
-my ($output,$inptail,$inpperm,$leperm,$keyperm) =	map("v$_",(3..7));
-my ($tweak,$seven,$eighty7,$tmp,$tweak1) =		map("v$_",(8..12));
-my $taillen = $key2;
-
-   ($inp,$idx) = ($idx,$inp);				# reassign
-
-$code.=<<___;
-.globl	.${prefix}_xts_encrypt
-.align	5
-.${prefix}_xts_encrypt:
-	mr		$inp,r3				# reassign
-	li		r3,-1
-	${UCMP}i	$len,16
-	bltlr-
-
-	lis		r0,0xfff0
-	mfspr		r12,256				# save vrsave
-	li		r11,0
-	mtspr		256,r0
-
-	vspltisb	$seven,0x07			# 0x070707..07
-	le?lvsl		$leperm,r11,r11
-	le?vspltisb	$tmp,0x0f
-	le?vxor		$leperm,$leperm,$seven
-
-	li		$idx,15
-	lvx		$tweak,0,$ivp			# load [unaligned] iv
-	lvsl		$inpperm,0,$ivp
-	lvx		$inptail,$idx,$ivp
-	le?vxor		$inpperm,$inpperm,$tmp
-	vperm		$tweak,$tweak,$inptail,$inpperm
-
-	neg		r11,$inp
-	lvsr		$inpperm,0,r11			# prepare for unaligned load
-	lvx		$inout,0,$inp
-	addi		$inp,$inp,15			# 15 is not typo
-	le?vxor		$inpperm,$inpperm,$tmp
-
-	${UCMP}i	$key2,0				# key2==NULL?
-	beq		Lxts_enc_no_key2
-
-	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
-	lwz		$rounds,240($key2)
-	srwi		$rounds,$rounds,1
-	subi		$rounds,$rounds,1
-	li		$idx,16
-
-	lvx		$rndkey0,0,$key2
-	lvx		$rndkey1,$idx,$key2
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$tweak,$tweak,$rndkey0
-	lvx		$rndkey0,$idx,$key2
-	addi		$idx,$idx,16
-	mtctr		$rounds
-
-Ltweak_xts_enc:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$tweak,$tweak,$rndkey1
-	lvx		$rndkey1,$idx,$key2
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipher		$tweak,$tweak,$rndkey0
-	lvx		$rndkey0,$idx,$key2
-	addi		$idx,$idx,16
-	bdnz		Ltweak_xts_enc
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$tweak,$tweak,$rndkey1
-	lvx		$rndkey1,$idx,$key2
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipherlast	$tweak,$tweak,$rndkey0
-
-	li		$ivp,0				# don't chain the tweak
-	b		Lxts_enc
-
-Lxts_enc_no_key2:
-	li		$idx,-16
-	and		$len,$len,$idx			# in "tweak chaining"
-							# mode only complete
-							# blocks are processed
-Lxts_enc:
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,16
-
-	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
-	lwz		$rounds,240($key1)
-	srwi		$rounds,$rounds,1
-	subi		$rounds,$rounds,1
-	li		$idx,16
-
-	vslb		$eighty7,$seven,$seven		# 0x808080..80
-	vor		$eighty7,$eighty7,$seven	# 0x878787..87
-	vspltisb	$tmp,1				# 0x010101..01
-	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
-
-	${UCMP}i	$len,96
-	bge		_aesp8_xts_encrypt6x
-
-	andi.		$taillen,$len,15
-	subic		r0,$len,32
-	subi		$taillen,$taillen,16
-	subfe		r0,r0,r0
-	and		r0,r0,$taillen
-	add		$inp,$inp,r0
-
-	lvx		$rndkey0,0,$key1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-	vperm		$inout,$inout,$inptail,$inpperm
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$inout,$inout,$tweak
-	vxor		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-	mtctr		$rounds
-	b		Loop_xts_enc
-
-.align	5
-Loop_xts_enc:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipher		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-	bdnz		Loop_xts_enc
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key1
-	li		$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$rndkey0,$rndkey0,$tweak
-	vcipherlast	$output,$inout,$rndkey0
-
-	le?vperm	$tmp,$output,$output,$leperm
-	be?nop
-	le?stvx_u	$tmp,0,$out
-	be?stvx_u	$output,0,$out
-	addi		$out,$out,16
-
-	subic.		$len,$len,16
-	beq		Lxts_enc_done
-
-	vmr		$inout,$inptail
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,16
-	lvx		$rndkey0,0,$key1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-
-	subic		r0,$len,32
-	subfe		r0,r0,r0
-	and		r0,r0,$taillen
-	add		$inp,$inp,r0
-
-	vsrab		$tmp,$tweak,$seven		# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	vand		$tmp,$tmp,$eighty7
-	vxor		$tweak,$tweak,$tmp
-
-	vperm		$inout,$inout,$inptail,$inpperm
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$inout,$inout,$tweak
-	vxor		$output,$output,$rndkey0	# just in case $len<16
-	vxor		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-
-	mtctr		$rounds
-	${UCMP}i	$len,16
-	bge		Loop_xts_enc
-
-	vxor		$output,$output,$tweak
-	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
-	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
-	vspltisb	$tmp,-1
-	vperm		$inptail,$inptail,$tmp,$inpperm
-	vsel		$inout,$inout,$output,$inptail
-
-	subi		r11,$out,17
-	subi		$out,$out,16
-	mtctr		$len
-	li		$len,16
-Loop_xts_enc_steal:
-	lbzu		r0,1(r11)
-	stb		r0,16(r11)
-	bdnz		Loop_xts_enc_steal
-
-	mtctr		$rounds
-	b		Loop_xts_enc			# one more time...
-
-Lxts_enc_done:
-	${UCMP}i	$ivp,0
-	beq		Lxts_enc_ret
-
-	vsrab		$tmp,$tweak,$seven		# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	vand		$tmp,$tmp,$eighty7
-	vxor		$tweak,$tweak,$tmp
-
-	le?vperm	$tweak,$tweak,$tweak,$leperm
-	stvx_u		$tweak,0,$ivp
-
-Lxts_enc_ret:
-	mtspr		256,r12				# restore vrsave
-	li		r3,0
-	blr
-	.long		0
-	.byte		0,12,0x04,0,0x80,6,6,0
-	.long		0
-.size	.${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
-
-.globl	.${prefix}_xts_decrypt
-.align	5
-.${prefix}_xts_decrypt:
-	mr		$inp,r3				# reassign
-	li		r3,-1
-	${UCMP}i	$len,16
-	bltlr-
-
-	lis		r0,0xfff8
-	mfspr		r12,256				# save vrsave
-	li		r11,0
-	mtspr		256,r0
-
-	andi.		r0,$len,15
-	neg		r0,r0
-	andi.		r0,r0,16
-	sub		$len,$len,r0
-
-	vspltisb	$seven,0x07			# 0x070707..07
-	le?lvsl		$leperm,r11,r11
-	le?vspltisb	$tmp,0x0f
-	le?vxor		$leperm,$leperm,$seven
-
-	li		$idx,15
-	lvx		$tweak,0,$ivp			# load [unaligned] iv
-	lvsl		$inpperm,0,$ivp
-	lvx		$inptail,$idx,$ivp
-	le?vxor		$inpperm,$inpperm,$tmp
-	vperm		$tweak,$tweak,$inptail,$inpperm
-
-	neg		r11,$inp
-	lvsr		$inpperm,0,r11			# prepare for unaligned load
-	lvx		$inout,0,$inp
-	addi		$inp,$inp,15			# 15 is not typo
-	le?vxor		$inpperm,$inpperm,$tmp
-
-	${UCMP}i	$key2,0				# key2==NULL?
-	beq		Lxts_dec_no_key2
-
-	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
-	lwz		$rounds,240($key2)
-	srwi		$rounds,$rounds,1
-	subi		$rounds,$rounds,1
-	li		$idx,16
-
-	lvx		$rndkey0,0,$key2
-	lvx		$rndkey1,$idx,$key2
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$tweak,$tweak,$rndkey0
-	lvx		$rndkey0,$idx,$key2
-	addi		$idx,$idx,16
-	mtctr		$rounds
-
-Ltweak_xts_dec:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$tweak,$tweak,$rndkey1
-	lvx		$rndkey1,$idx,$key2
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipher		$tweak,$tweak,$rndkey0
-	lvx		$rndkey0,$idx,$key2
-	addi		$idx,$idx,16
-	bdnz		Ltweak_xts_dec
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vcipher		$tweak,$tweak,$rndkey1
-	lvx		$rndkey1,$idx,$key2
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vcipherlast	$tweak,$tweak,$rndkey0
-
-	li		$ivp,0				# don't chain the tweak
-	b		Lxts_dec
-
-Lxts_dec_no_key2:
-	neg		$idx,$len
-	andi.		$idx,$idx,15
-	add		$len,$len,$idx			# in "tweak chaining"
-							# mode only complete
-							# blocks are processed
-Lxts_dec:
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,16
-
-	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
-	lwz		$rounds,240($key1)
-	srwi		$rounds,$rounds,1
-	subi		$rounds,$rounds,1
-	li		$idx,16
-
-	vslb		$eighty7,$seven,$seven		# 0x808080..80
-	vor		$eighty7,$eighty7,$seven	# 0x878787..87
-	vspltisb	$tmp,1				# 0x010101..01
-	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
-
-	${UCMP}i	$len,96
-	bge		_aesp8_xts_decrypt6x
-
-	lvx		$rndkey0,0,$key1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-	vperm		$inout,$inout,$inptail,$inpperm
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$inout,$inout,$tweak
-	vxor		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-	mtctr		$rounds
-
-	${UCMP}i	$len,16
-	blt		Ltail_xts_dec
-	be?b		Loop_xts_dec
-
-.align	5
-Loop_xts_dec:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vncipher	$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vncipher	$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-	bdnz		Loop_xts_dec
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vncipher	$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key1
-	li		$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$rndkey0,$rndkey0,$tweak
-	vncipherlast	$output,$inout,$rndkey0
-
-	le?vperm	$tmp,$output,$output,$leperm
-	be?nop
-	le?stvx_u	$tmp,0,$out
-	be?stvx_u	$output,0,$out
-	addi		$out,$out,16
-
-	subic.		$len,$len,16
-	beq		Lxts_dec_done
-
-	vmr		$inout,$inptail
-	lvx		$inptail,0,$inp
-	addi		$inp,$inp,16
-	lvx		$rndkey0,0,$key1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-
-	vsrab		$tmp,$tweak,$seven		# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	vand		$tmp,$tmp,$eighty7
-	vxor		$tweak,$tweak,$tmp
-
-	vperm		$inout,$inout,$inptail,$inpperm
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$inout,$inout,$tweak
-	vxor		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-
-	mtctr		$rounds
-	${UCMP}i	$len,16
-	bge		Loop_xts_dec
-
-Ltail_xts_dec:
-	vsrab		$tmp,$tweak,$seven		# next tweak value
-	vaddubm		$tweak1,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	vand		$tmp,$tmp,$eighty7
-	vxor		$tweak1,$tweak1,$tmp
-
-	subi		$inp,$inp,16
-	add		$inp,$inp,$len
-
-	vxor		$inout,$inout,$tweak		# :-(
-	vxor		$inout,$inout,$tweak1		# :-)
-
-Loop_xts_dec_short:
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vncipher	$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vncipher	$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-	bdnz		Loop_xts_dec_short
-
-	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
-	vncipher	$inout,$inout,$rndkey1
-	lvx		$rndkey1,$idx,$key1
-	li		$idx,16
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-	vxor		$rndkey0,$rndkey0,$tweak1
-	vncipherlast	$output,$inout,$rndkey0
-
-	le?vperm	$tmp,$output,$output,$leperm
-	be?nop
-	le?stvx_u	$tmp,0,$out
-	be?stvx_u	$output,0,$out
-
-	vmr		$inout,$inptail
-	lvx		$inptail,0,$inp
-	#addi		$inp,$inp,16
-	lvx		$rndkey0,0,$key1
-	lvx		$rndkey1,$idx,$key1
-	addi		$idx,$idx,16
-	vperm		$inout,$inout,$inptail,$inpperm
-	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
-
-	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
-	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
-	vspltisb	$tmp,-1
-	vperm		$inptail,$inptail,$tmp,$inpperm
-	vsel		$inout,$inout,$output,$inptail
-
-	vxor		$rndkey0,$rndkey0,$tweak
-	vxor		$inout,$inout,$rndkey0
-	lvx		$rndkey0,$idx,$key1
-	addi		$idx,$idx,16
-
-	subi		r11,$out,1
-	mtctr		$len
-	li		$len,16
-Loop_xts_dec_steal:
-	lbzu		r0,1(r11)
-	stb		r0,16(r11)
-	bdnz		Loop_xts_dec_steal
-
-	mtctr		$rounds
-	b		Loop_xts_dec			# one more time...
-
-Lxts_dec_done:
-	${UCMP}i	$ivp,0
-	beq		Lxts_dec_ret
-
-	vsrab		$tmp,$tweak,$seven		# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	vand		$tmp,$tmp,$eighty7
-	vxor		$tweak,$tweak,$tmp
-
-	le?vperm	$tweak,$tweak,$tweak,$leperm
-	stvx_u		$tweak,0,$ivp
-
-Lxts_dec_ret:
-	mtspr		256,r12				# restore vrsave
-	li		r3,0
-	blr
-	.long		0
-	.byte		0,12,0x04,0,0x80,6,6,0
-	.long		0
-.size	.${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
-___
-#########################################################################
-{{	# Optimized XTS procedures					#
-my $key_=$key2;
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
-    $x00=0 if ($flavour =~ /osx/);
-my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
-my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
-my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
-my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
-			# v26-v31 last 6 round keys
-my ($keyperm)=($out0);	# aliases with "caller", redundant assignment
-my $taillen=$x70;
-
-$code.=<<___;
-.align	5
-_aesp8_xts_encrypt6x:
-	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
-	mflr		r11
-	li		r7,`$FRAME+8*16+15`
-	li		r3,`$FRAME+8*16+31`
-	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
-	stvx		v20,r7,$sp		# ABI says so
-	addi		r7,r7,32
-	stvx		v21,r3,$sp
-	addi		r3,r3,32
-	stvx		v22,r7,$sp
-	addi		r7,r7,32
-	stvx		v23,r3,$sp
-	addi		r3,r3,32
-	stvx		v24,r7,$sp
-	addi		r7,r7,32
-	stvx		v25,r3,$sp
-	addi		r3,r3,32
-	stvx		v26,r7,$sp
-	addi		r7,r7,32
-	stvx		v27,r3,$sp
-	addi		r3,r3,32
-	stvx		v28,r7,$sp
-	addi		r7,r7,32
-	stvx		v29,r3,$sp
-	addi		r3,r3,32
-	stvx		v30,r7,$sp
-	stvx		v31,r3,$sp
-	li		r0,-1
-	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
-	li		$x10,0x10
-	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	li		$x20,0x20
-	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	li		$x30,0x30
-	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	li		$x40,0x40
-	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	li		$x50,0x50
-	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	li		$x60,0x60
-	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	li		$x70,0x70
-	mtspr		256,r0
-
-	subi		$rounds,$rounds,3	# -4 in total
-
-	lvx		$rndkey0,$x00,$key1	# load key schedule
-	lvx		v30,$x10,$key1
-	addi		$key1,$key1,0x20
-	lvx		v31,$x00,$key1
-	?vperm		$rndkey0,$rndkey0,v30,$keyperm
-	addi		$key_,$sp,`$FRAME+15`
-	mtctr		$rounds
-
-Load_xts_enc_key:
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v30,$x10,$key1
-	addi		$key1,$key1,0x20
-	stvx		v24,$x00,$key_		# off-load round[1]
-	?vperm		v25,v31,v30,$keyperm
-	lvx		v31,$x00,$key1
-	stvx		v25,$x10,$key_		# off-load round[2]
-	addi		$key_,$key_,0x20
-	bdnz		Load_xts_enc_key
-
-	lvx		v26,$x10,$key1
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v27,$x20,$key1
-	stvx		v24,$x00,$key_		# off-load round[3]
-	?vperm		v25,v31,v26,$keyperm
-	lvx		v28,$x30,$key1
-	stvx		v25,$x10,$key_		# off-load round[4]
-	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
-	?vperm		v26,v26,v27,$keyperm
-	lvx		v29,$x40,$key1
-	?vperm		v27,v27,v28,$keyperm
-	lvx		v30,$x50,$key1
-	?vperm		v28,v28,v29,$keyperm
-	lvx		v31,$x60,$key1
-	?vperm		v29,v29,v30,$keyperm
-	lvx		$twk5,$x70,$key1	# borrow $twk5
-	?vperm		v30,v30,v31,$keyperm
-	lvx		v24,$x00,$key_		# pre-load round[1]
-	?vperm		v31,v31,$twk5,$keyperm
-	lvx		v25,$x10,$key_		# pre-load round[2]
-
-	 vperm		$in0,$inout,$inptail,$inpperm
-	 subi		$inp,$inp,31		# undo "caller"
-	vxor		$twk0,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out0,$in0,$twk0
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in1,$x10,$inp
-	vxor		$twk1,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in1,$in1,$in1,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out1,$in1,$twk1
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in2,$x20,$inp
-	 andi.		$taillen,$len,15
-	vxor		$twk2,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in2,$in2,$in2,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out2,$in2,$twk2
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in3,$x30,$inp
-	 sub		$len,$len,$taillen
-	vxor		$twk3,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in3,$in3,$in3,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out3,$in3,$twk3
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in4,$x40,$inp
-	 subi		$len,$len,0x60
-	vxor		$twk4,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in4,$in4,$in4,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out4,$in4,$twk4
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in5,$x50,$inp
-	 addi		$inp,$inp,0x60
-	vxor		$twk5,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in5,$in5,$in5,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out5,$in5,$twk5
-	vxor		$tweak,$tweak,$tmp
-
-	vxor		v31,v31,$rndkey0
-	mtctr		$rounds
-	b		Loop_xts_enc6x
-
-.align	5
-Loop_xts_enc6x:
-	vcipher		$out0,$out0,v24
-	vcipher		$out1,$out1,v24
-	vcipher		$out2,$out2,v24
-	vcipher		$out3,$out3,v24
-	vcipher		$out4,$out4,v24
-	vcipher		$out5,$out5,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vcipher		$out0,$out0,v25
-	vcipher		$out1,$out1,v25
-	vcipher		$out2,$out2,v25
-	vcipher		$out3,$out3,v25
-	vcipher		$out4,$out4,v25
-	vcipher		$out5,$out5,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Loop_xts_enc6x
-
-	subic		$len,$len,96		# $len-=96
-	 vxor		$in0,$twk0,v31		# xor with last round key
-	vcipher		$out0,$out0,v24
-	vcipher		$out1,$out1,v24
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk0,$tweak,$rndkey0
-	 vaddubm	$tweak,$tweak,$tweak
-	vcipher		$out2,$out2,v24
-	vcipher		$out3,$out3,v24
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vcipher		$out4,$out4,v24
-	vcipher		$out5,$out5,v24
-
-	subfe.		r0,r0,r0		# borrow?-1:0
-	 vand		$tmp,$tmp,$eighty7
-	vcipher		$out0,$out0,v25
-	vcipher		$out1,$out1,v25
-	 vxor		$tweak,$tweak,$tmp
-	vcipher		$out2,$out2,v25
-	vcipher		$out3,$out3,v25
-	 vxor		$in1,$twk1,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk1,$tweak,$rndkey0
-	vcipher		$out4,$out4,v25
-	vcipher		$out5,$out5,v25
-
-	and		r0,r0,$len
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vcipher		$out0,$out0,v26
-	vcipher		$out1,$out1,v26
-	 vand		$tmp,$tmp,$eighty7
-	vcipher		$out2,$out2,v26
-	vcipher		$out3,$out3,v26
-	 vxor		$tweak,$tweak,$tmp
-	vcipher		$out4,$out4,v26
-	vcipher		$out5,$out5,v26
-
-	add		$inp,$inp,r0		# $inp is adjusted in such
-						# way that at exit from the
-						# loop inX-in5 are loaded
-						# with last "words"
-	 vxor		$in2,$twk2,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk2,$tweak,$rndkey0
-	 vaddubm	$tweak,$tweak,$tweak
-	vcipher		$out0,$out0,v27
-	vcipher		$out1,$out1,v27
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vcipher		$out2,$out2,v27
-	vcipher		$out3,$out3,v27
-	 vand		$tmp,$tmp,$eighty7
-	vcipher		$out4,$out4,v27
-	vcipher		$out5,$out5,v27
-
-	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
-	 vxor		$tweak,$tweak,$tmp
-	vcipher		$out0,$out0,v28
-	vcipher		$out1,$out1,v28
-	 vxor		$in3,$twk3,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk3,$tweak,$rndkey0
-	vcipher		$out2,$out2,v28
-	vcipher		$out3,$out3,v28
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vcipher		$out4,$out4,v28
-	vcipher		$out5,$out5,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-	 vand		$tmp,$tmp,$eighty7
-
-	vcipher		$out0,$out0,v29
-	vcipher		$out1,$out1,v29
-	 vxor		$tweak,$tweak,$tmp
-	vcipher		$out2,$out2,v29
-	vcipher		$out3,$out3,v29
-	 vxor		$in4,$twk4,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk4,$tweak,$rndkey0
-	vcipher		$out4,$out4,v29
-	vcipher		$out5,$out5,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-
-	vcipher		$out0,$out0,v30
-	vcipher		$out1,$out1,v30
-	 vand		$tmp,$tmp,$eighty7
-	vcipher		$out2,$out2,v30
-	vcipher		$out3,$out3,v30
-	 vxor		$tweak,$tweak,$tmp
-	vcipher		$out4,$out4,v30
-	vcipher		$out5,$out5,v30
-	 vxor		$in5,$twk5,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk5,$tweak,$rndkey0
-
-	vcipherlast	$out0,$out0,$in0
-	 lvx_u		$in0,$x00,$inp		# load next input block
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vcipherlast	$out1,$out1,$in1
-	 lvx_u		$in1,$x10,$inp
-	vcipherlast	$out2,$out2,$in2
-	 le?vperm	$in0,$in0,$in0,$leperm
-	 lvx_u		$in2,$x20,$inp
-	 vand		$tmp,$tmp,$eighty7
-	vcipherlast	$out3,$out3,$in3
-	 le?vperm	$in1,$in1,$in1,$leperm
-	 lvx_u		$in3,$x30,$inp
-	vcipherlast	$out4,$out4,$in4
-	 le?vperm	$in2,$in2,$in2,$leperm
-	 lvx_u		$in4,$x40,$inp
-	 vxor		$tweak,$tweak,$tmp
-	vcipherlast	$tmp,$out5,$in5		# last block might be needed
-						# in stealing mode
-	 le?vperm	$in3,$in3,$in3,$leperm
-	 lvx_u		$in5,$x50,$inp
-	 addi		$inp,$inp,0x60
-	 le?vperm	$in4,$in4,$in4,$leperm
-	 le?vperm	$in5,$in5,$in5,$leperm
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	 vxor		$out0,$in0,$twk0
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	 vxor		$out1,$in1,$twk1
-	le?vperm	$out3,$out3,$out3,$leperm
-	stvx_u		$out2,$x20,$out
-	 vxor		$out2,$in2,$twk2
-	le?vperm	$out4,$out4,$out4,$leperm
-	stvx_u		$out3,$x30,$out
-	 vxor		$out3,$in3,$twk3
-	le?vperm	$out5,$tmp,$tmp,$leperm
-	stvx_u		$out4,$x40,$out
-	 vxor		$out4,$in4,$twk4
-	le?stvx_u	$out5,$x50,$out
-	be?stvx_u	$tmp, $x50,$out
-	 vxor		$out5,$in5,$twk5
-	addi		$out,$out,0x60
-
-	mtctr		$rounds
-	beq		Loop_xts_enc6x		# did $len-=96 borrow?
-
-	addic.		$len,$len,0x60
-	beq		Lxts_enc6x_zero
-	cmpwi		$len,0x20
-	blt		Lxts_enc6x_one
-	nop
-	beq		Lxts_enc6x_two
-	cmpwi		$len,0x40
-	blt		Lxts_enc6x_three
-	nop
-	beq		Lxts_enc6x_four
-
-Lxts_enc6x_five:
-	vxor		$out0,$in1,$twk0
-	vxor		$out1,$in2,$twk1
-	vxor		$out2,$in3,$twk2
-	vxor		$out3,$in4,$twk3
-	vxor		$out4,$in5,$twk4
-
-	bl		_aesp8_xts_enc5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk5		# unused tweak
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$leperm
-	stvx_u		$out2,$x20,$out
-	vxor		$tmp,$out4,$twk5	# last block prep for stealing
-	le?vperm	$out4,$out4,$out4,$leperm
-	stvx_u		$out3,$x30,$out
-	stvx_u		$out4,$x40,$out
-	addi		$out,$out,0x50
-	bne		Lxts_enc6x_steal
-	b		Lxts_enc6x_done
-
-.align	4
-Lxts_enc6x_four:
-	vxor		$out0,$in2,$twk0
-	vxor		$out1,$in3,$twk1
-	vxor		$out2,$in4,$twk2
-	vxor		$out3,$in5,$twk3
-	vxor		$out4,$out4,$out4
-
-	bl		_aesp8_xts_enc5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk4		# unused tweak
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	vxor		$tmp,$out3,$twk4	# last block prep for stealing
-	le?vperm	$out3,$out3,$out3,$leperm
-	stvx_u		$out2,$x20,$out
-	stvx_u		$out3,$x30,$out
-	addi		$out,$out,0x40
-	bne		Lxts_enc6x_steal
-	b		Lxts_enc6x_done
-
-.align	4
-Lxts_enc6x_three:
-	vxor		$out0,$in3,$twk0
-	vxor		$out1,$in4,$twk1
-	vxor		$out2,$in5,$twk2
-	vxor		$out3,$out3,$out3
-	vxor		$out4,$out4,$out4
-
-	bl		_aesp8_xts_enc5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk3		# unused tweak
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	vxor		$tmp,$out2,$twk3	# last block prep for stealing
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	stvx_u		$out2,$x20,$out
-	addi		$out,$out,0x30
-	bne		Lxts_enc6x_steal
-	b		Lxts_enc6x_done
-
-.align	4
-Lxts_enc6x_two:
-	vxor		$out0,$in4,$twk0
-	vxor		$out1,$in5,$twk1
-	vxor		$out2,$out2,$out2
-	vxor		$out3,$out3,$out3
-	vxor		$out4,$out4,$out4
-
-	bl		_aesp8_xts_enc5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk2		# unused tweak
-	vxor		$tmp,$out1,$twk2	# last block prep for stealing
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	stvx_u		$out1,$x10,$out
-	addi		$out,$out,0x20
-	bne		Lxts_enc6x_steal
-	b		Lxts_enc6x_done
-
-.align	4
-Lxts_enc6x_one:
-	vxor		$out0,$in5,$twk0
-	nop
-Loop_xts_enc1x:
-	vcipher		$out0,$out0,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vcipher		$out0,$out0,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Loop_xts_enc1x
-
-	add		$inp,$inp,$taillen
-	cmpwi		$taillen,0
-	vcipher		$out0,$out0,v24
-
-	subi		$inp,$inp,16
-	vcipher		$out0,$out0,v25
-
-	lvsr		$inpperm,0,$taillen
-	vcipher		$out0,$out0,v26
-
-	lvx_u		$in0,0,$inp
-	vcipher		$out0,$out0,v27
-
-	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
-	vcipher		$out0,$out0,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-
-	vcipher		$out0,$out0,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-	 vxor		$twk0,$twk0,v31
-
-	le?vperm	$in0,$in0,$in0,$leperm
-	vcipher		$out0,$out0,v30
-
-	vperm		$in0,$in0,$in0,$inpperm
-	vcipherlast	$out0,$out0,$twk0
-
-	vmr		$twk0,$twk1		# unused tweak
-	vxor		$tmp,$out0,$twk1	# last block prep for stealing
-	le?vperm	$out0,$out0,$out0,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	addi		$out,$out,0x10
-	bne		Lxts_enc6x_steal
-	b		Lxts_enc6x_done
-
-.align	4
-Lxts_enc6x_zero:
-	cmpwi		$taillen,0
-	beq		Lxts_enc6x_done
-
-	add		$inp,$inp,$taillen
-	subi		$inp,$inp,16
-	lvx_u		$in0,0,$inp
-	lvsr		$inpperm,0,$taillen	# $in5 is no more
-	le?vperm	$in0,$in0,$in0,$leperm
-	vperm		$in0,$in0,$in0,$inpperm
-	vxor		$tmp,$tmp,$twk0
-Lxts_enc6x_steal:
-	vxor		$in0,$in0,$twk0
-	vxor		$out0,$out0,$out0
-	vspltisb	$out1,-1
-	vperm		$out0,$out0,$out1,$inpperm
-	vsel		$out0,$in0,$tmp,$out0	# $tmp is last block, remember?
-
-	subi		r30,$out,17
-	subi		$out,$out,16
-	mtctr		$taillen
-Loop_xts_enc6x_steal:
-	lbzu		r0,1(r30)
-	stb		r0,16(r30)
-	bdnz		Loop_xts_enc6x_steal
-
-	li		$taillen,0
-	mtctr		$rounds
-	b		Loop_xts_enc1x		# one more time...
-
-.align	4
-Lxts_enc6x_done:
-	${UCMP}i	$ivp,0
-	beq		Lxts_enc6x_ret
-
-	vxor		$tweak,$twk0,$rndkey0
-	le?vperm	$tweak,$tweak,$tweak,$leperm
-	stvx_u		$tweak,0,$ivp
-
-Lxts_enc6x_ret:
-	mtlr		r11
-	li		r10,`$FRAME+15`
-	li		r11,`$FRAME+31`
-	stvx		$seven,r10,$sp		# wipe copies of round keys
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-	stvx		$seven,r10,$sp
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-	stvx		$seven,r10,$sp
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-	stvx		$seven,r10,$sp
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-
-	mtspr		256,$vrsave
-	lvx		v20,r10,$sp		# ABI says so
-	addi		r10,r10,32
-	lvx		v21,r11,$sp
-	addi		r11,r11,32
-	lvx		v22,r10,$sp
-	addi		r10,r10,32
-	lvx		v23,r11,$sp
-	addi		r11,r11,32
-	lvx		v24,r10,$sp
-	addi		r10,r10,32
-	lvx		v25,r11,$sp
-	addi		r11,r11,32
-	lvx		v26,r10,$sp
-	addi		r10,r10,32
-	lvx		v27,r11,$sp
-	addi		r11,r11,32
-	lvx		v28,r10,$sp
-	addi		r10,r10,32
-	lvx		v29,r11,$sp
-	addi		r11,r11,32
-	lvx		v30,r10,$sp
-	lvx		v31,r11,$sp
-	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
-	blr
-	.long		0
-	.byte		0,12,0x04,1,0x80,6,6,0
-	.long		0
-
-.align	5
-_aesp8_xts_enc5x:
-	vcipher		$out0,$out0,v24
-	vcipher		$out1,$out1,v24
-	vcipher		$out2,$out2,v24
-	vcipher		$out3,$out3,v24
-	vcipher		$out4,$out4,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vcipher		$out0,$out0,v25
-	vcipher		$out1,$out1,v25
-	vcipher		$out2,$out2,v25
-	vcipher		$out3,$out3,v25
-	vcipher		$out4,$out4,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		_aesp8_xts_enc5x
-
-	add		$inp,$inp,$taillen
-	cmpwi		$taillen,0
-	vcipher		$out0,$out0,v24
-	vcipher		$out1,$out1,v24
-	vcipher		$out2,$out2,v24
-	vcipher		$out3,$out3,v24
-	vcipher		$out4,$out4,v24
-
-	subi		$inp,$inp,16
-	vcipher		$out0,$out0,v25
-	vcipher		$out1,$out1,v25
-	vcipher		$out2,$out2,v25
-	vcipher		$out3,$out3,v25
-	vcipher		$out4,$out4,v25
-	 vxor		$twk0,$twk0,v31
-
-	vcipher		$out0,$out0,v26
-	lvsr		$inpperm,0,$taillen	# $in5 is no more
-	vcipher		$out1,$out1,v26
-	vcipher		$out2,$out2,v26
-	vcipher		$out3,$out3,v26
-	vcipher		$out4,$out4,v26
-	 vxor		$in1,$twk1,v31
-
-	vcipher		$out0,$out0,v27
-	lvx_u		$in0,0,$inp
-	vcipher		$out1,$out1,v27
-	vcipher		$out2,$out2,v27
-	vcipher		$out3,$out3,v27
-	vcipher		$out4,$out4,v27
-	 vxor		$in2,$twk2,v31
-
-	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
-	vcipher		$out0,$out0,v28
-	vcipher		$out1,$out1,v28
-	vcipher		$out2,$out2,v28
-	vcipher		$out3,$out3,v28
-	vcipher		$out4,$out4,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-	 vxor		$in3,$twk3,v31
-
-	vcipher		$out0,$out0,v29
-	le?vperm	$in0,$in0,$in0,$leperm
-	vcipher		$out1,$out1,v29
-	vcipher		$out2,$out2,v29
-	vcipher		$out3,$out3,v29
-	vcipher		$out4,$out4,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-	 vxor		$in4,$twk4,v31
-
-	vcipher		$out0,$out0,v30
-	vperm		$in0,$in0,$in0,$inpperm
-	vcipher		$out1,$out1,v30
-	vcipher		$out2,$out2,v30
-	vcipher		$out3,$out3,v30
-	vcipher		$out4,$out4,v30
-
-	vcipherlast	$out0,$out0,$twk0
-	vcipherlast	$out1,$out1,$in1
-	vcipherlast	$out2,$out2,$in2
-	vcipherlast	$out3,$out3,$in3
-	vcipherlast	$out4,$out4,$in4
-	blr
-        .long   	0
-        .byte   	0,12,0x14,0,0,0,0,0
-
-.align	5
-_aesp8_xts_decrypt6x:
-	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
-	mflr		r11
-	li		r7,`$FRAME+8*16+15`
-	li		r3,`$FRAME+8*16+31`
-	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
-	stvx		v20,r7,$sp		# ABI says so
-	addi		r7,r7,32
-	stvx		v21,r3,$sp
-	addi		r3,r3,32
-	stvx		v22,r7,$sp
-	addi		r7,r7,32
-	stvx		v23,r3,$sp
-	addi		r3,r3,32
-	stvx		v24,r7,$sp
-	addi		r7,r7,32
-	stvx		v25,r3,$sp
-	addi		r3,r3,32
-	stvx		v26,r7,$sp
-	addi		r7,r7,32
-	stvx		v27,r3,$sp
-	addi		r3,r3,32
-	stvx		v28,r7,$sp
-	addi		r7,r7,32
-	stvx		v29,r3,$sp
-	addi		r3,r3,32
-	stvx		v30,r7,$sp
-	stvx		v31,r3,$sp
-	li		r0,-1
-	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
-	li		$x10,0x10
-	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	li		$x20,0x20
-	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	li		$x30,0x30
-	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	li		$x40,0x40
-	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	li		$x50,0x50
-	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	li		$x60,0x60
-	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	li		$x70,0x70
-	mtspr		256,r0
-
-	subi		$rounds,$rounds,3	# -4 in total
-
-	lvx		$rndkey0,$x00,$key1	# load key schedule
-	lvx		v30,$x10,$key1
-	addi		$key1,$key1,0x20
-	lvx		v31,$x00,$key1
-	?vperm		$rndkey0,$rndkey0,v30,$keyperm
-	addi		$key_,$sp,`$FRAME+15`
-	mtctr		$rounds
-
-Load_xts_dec_key:
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v30,$x10,$key1
-	addi		$key1,$key1,0x20
-	stvx		v24,$x00,$key_		# off-load round[1]
-	?vperm		v25,v31,v30,$keyperm
-	lvx		v31,$x00,$key1
-	stvx		v25,$x10,$key_		# off-load round[2]
-	addi		$key_,$key_,0x20
-	bdnz		Load_xts_dec_key
-
-	lvx		v26,$x10,$key1
-	?vperm		v24,v30,v31,$keyperm
-	lvx		v27,$x20,$key1
-	stvx		v24,$x00,$key_		# off-load round[3]
-	?vperm		v25,v31,v26,$keyperm
-	lvx		v28,$x30,$key1
-	stvx		v25,$x10,$key_		# off-load round[4]
-	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
-	?vperm		v26,v26,v27,$keyperm
-	lvx		v29,$x40,$key1
-	?vperm		v27,v27,v28,$keyperm
-	lvx		v30,$x50,$key1
-	?vperm		v28,v28,v29,$keyperm
-	lvx		v31,$x60,$key1
-	?vperm		v29,v29,v30,$keyperm
-	lvx		$twk5,$x70,$key1	# borrow $twk5
-	?vperm		v30,v30,v31,$keyperm
-	lvx		v24,$x00,$key_		# pre-load round[1]
-	?vperm		v31,v31,$twk5,$keyperm
-	lvx		v25,$x10,$key_		# pre-load round[2]
-
-	 vperm		$in0,$inout,$inptail,$inpperm
-	 subi		$inp,$inp,31		# undo "caller"
-	vxor		$twk0,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out0,$in0,$twk0
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in1,$x10,$inp
-	vxor		$twk1,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in1,$in1,$in1,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out1,$in1,$twk1
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in2,$x20,$inp
-	 andi.		$taillen,$len,15
-	vxor		$twk2,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in2,$in2,$in2,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out2,$in2,$twk2
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in3,$x30,$inp
-	 sub		$len,$len,$taillen
-	vxor		$twk3,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in3,$in3,$in3,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out3,$in3,$twk3
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in4,$x40,$inp
-	 subi		$len,$len,0x60
-	vxor		$twk4,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in4,$in4,$in4,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out4,$in4,$twk4
-	vxor		$tweak,$tweak,$tmp
-
-	 lvx_u		$in5,$x50,$inp
-	 addi		$inp,$inp,0x60
-	vxor		$twk5,$tweak,$rndkey0
-	vsrab		$tmp,$tweak,$seven	# next tweak value
-	vaddubm		$tweak,$tweak,$tweak
-	vsldoi		$tmp,$tmp,$tmp,15
-	 le?vperm	$in5,$in5,$in5,$leperm
-	vand		$tmp,$tmp,$eighty7
-	 vxor		$out5,$in5,$twk5
-	vxor		$tweak,$tweak,$tmp
-
-	vxor		v31,v31,$rndkey0
-	mtctr		$rounds
-	b		Loop_xts_dec6x
-
-.align	5
-Loop_xts_dec6x:
-	vncipher	$out0,$out0,v24
-	vncipher	$out1,$out1,v24
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	vncipher	$out4,$out4,v24
-	vncipher	$out5,$out5,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vncipher	$out0,$out0,v25
-	vncipher	$out1,$out1,v25
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	vncipher	$out4,$out4,v25
-	vncipher	$out5,$out5,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Loop_xts_dec6x
-
-	subic		$len,$len,96		# $len-=96
-	 vxor		$in0,$twk0,v31		# xor with last round key
-	vncipher	$out0,$out0,v24
-	vncipher	$out1,$out1,v24
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk0,$tweak,$rndkey0
-	 vaddubm	$tweak,$tweak,$tweak
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vncipher	$out4,$out4,v24
-	vncipher	$out5,$out5,v24
-
-	subfe.		r0,r0,r0		# borrow?-1:0
-	 vand		$tmp,$tmp,$eighty7
-	vncipher	$out0,$out0,v25
-	vncipher	$out1,$out1,v25
-	 vxor		$tweak,$tweak,$tmp
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	 vxor		$in1,$twk1,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk1,$tweak,$rndkey0
-	vncipher	$out4,$out4,v25
-	vncipher	$out5,$out5,v25
-
-	and		r0,r0,$len
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vncipher	$out0,$out0,v26
-	vncipher	$out1,$out1,v26
-	 vand		$tmp,$tmp,$eighty7
-	vncipher	$out2,$out2,v26
-	vncipher	$out3,$out3,v26
-	 vxor		$tweak,$tweak,$tmp
-	vncipher	$out4,$out4,v26
-	vncipher	$out5,$out5,v26
-
-	add		$inp,$inp,r0		# $inp is adjusted in such
-						# way that at exit from the
-						# loop inX-in5 are loaded
-						# with last "words"
-	 vxor		$in2,$twk2,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk2,$tweak,$rndkey0
-	 vaddubm	$tweak,$tweak,$tweak
-	vncipher	$out0,$out0,v27
-	vncipher	$out1,$out1,v27
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vncipher	$out2,$out2,v27
-	vncipher	$out3,$out3,v27
-	 vand		$tmp,$tmp,$eighty7
-	vncipher	$out4,$out4,v27
-	vncipher	$out5,$out5,v27
-
-	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
-	 vxor		$tweak,$tweak,$tmp
-	vncipher	$out0,$out0,v28
-	vncipher	$out1,$out1,v28
-	 vxor		$in3,$twk3,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk3,$tweak,$rndkey0
-	vncipher	$out2,$out2,v28
-	vncipher	$out3,$out3,v28
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vncipher	$out4,$out4,v28
-	vncipher	$out5,$out5,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-	 vand		$tmp,$tmp,$eighty7
-
-	vncipher	$out0,$out0,v29
-	vncipher	$out1,$out1,v29
-	 vxor		$tweak,$tweak,$tmp
-	vncipher	$out2,$out2,v29
-	vncipher	$out3,$out3,v29
-	 vxor		$in4,$twk4,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk4,$tweak,$rndkey0
-	vncipher	$out4,$out4,v29
-	vncipher	$out5,$out5,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-
-	vncipher	$out0,$out0,v30
-	vncipher	$out1,$out1,v30
-	 vand		$tmp,$tmp,$eighty7
-	vncipher	$out2,$out2,v30
-	vncipher	$out3,$out3,v30
-	 vxor		$tweak,$tweak,$tmp
-	vncipher	$out4,$out4,v30
-	vncipher	$out5,$out5,v30
-	 vxor		$in5,$twk5,v31
-	 vsrab		$tmp,$tweak,$seven	# next tweak value
-	 vxor		$twk5,$tweak,$rndkey0
-
-	vncipherlast	$out0,$out0,$in0
-	 lvx_u		$in0,$x00,$inp		# load next input block
-	 vaddubm	$tweak,$tweak,$tweak
-	 vsldoi		$tmp,$tmp,$tmp,15
-	vncipherlast	$out1,$out1,$in1
-	 lvx_u		$in1,$x10,$inp
-	vncipherlast	$out2,$out2,$in2
-	 le?vperm	$in0,$in0,$in0,$leperm
-	 lvx_u		$in2,$x20,$inp
-	 vand		$tmp,$tmp,$eighty7
-	vncipherlast	$out3,$out3,$in3
-	 le?vperm	$in1,$in1,$in1,$leperm
-	 lvx_u		$in3,$x30,$inp
-	vncipherlast	$out4,$out4,$in4
-	 le?vperm	$in2,$in2,$in2,$leperm
-	 lvx_u		$in4,$x40,$inp
-	 vxor		$tweak,$tweak,$tmp
-	vncipherlast	$out5,$out5,$in5
-	 le?vperm	$in3,$in3,$in3,$leperm
-	 lvx_u		$in5,$x50,$inp
-	 addi		$inp,$inp,0x60
-	 le?vperm	$in4,$in4,$in4,$leperm
-	 le?vperm	$in5,$in5,$in5,$leperm
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	 vxor		$out0,$in0,$twk0
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	 vxor		$out1,$in1,$twk1
-	le?vperm	$out3,$out3,$out3,$leperm
-	stvx_u		$out2,$x20,$out
-	 vxor		$out2,$in2,$twk2
-	le?vperm	$out4,$out4,$out4,$leperm
-	stvx_u		$out3,$x30,$out
-	 vxor		$out3,$in3,$twk3
-	le?vperm	$out5,$out5,$out5,$leperm
-	stvx_u		$out4,$x40,$out
-	 vxor		$out4,$in4,$twk4
-	stvx_u		$out5,$x50,$out
-	 vxor		$out5,$in5,$twk5
-	addi		$out,$out,0x60
-
-	mtctr		$rounds
-	beq		Loop_xts_dec6x		# did $len-=96 borrow?
-
-	addic.		$len,$len,0x60
-	beq		Lxts_dec6x_zero
-	cmpwi		$len,0x20
-	blt		Lxts_dec6x_one
-	nop
-	beq		Lxts_dec6x_two
-	cmpwi		$len,0x40
-	blt		Lxts_dec6x_three
-	nop
-	beq		Lxts_dec6x_four
-
-Lxts_dec6x_five:
-	vxor		$out0,$in1,$twk0
-	vxor		$out1,$in2,$twk1
-	vxor		$out2,$in3,$twk2
-	vxor		$out3,$in4,$twk3
-	vxor		$out4,$in5,$twk4
-
-	bl		_aesp8_xts_dec5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk5		# unused tweak
-	vxor		$twk1,$tweak,$rndkey0
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	vxor		$out0,$in0,$twk1
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$leperm
-	stvx_u		$out2,$x20,$out
-	le?vperm	$out4,$out4,$out4,$leperm
-	stvx_u		$out3,$x30,$out
-	stvx_u		$out4,$x40,$out
-	addi		$out,$out,0x50
-	bne		Lxts_dec6x_steal
-	b		Lxts_dec6x_done
-
-.align	4
-Lxts_dec6x_four:
-	vxor		$out0,$in2,$twk0
-	vxor		$out1,$in3,$twk1
-	vxor		$out2,$in4,$twk2
-	vxor		$out3,$in5,$twk3
-	vxor		$out4,$out4,$out4
-
-	bl		_aesp8_xts_dec5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk4		# unused tweak
-	vmr		$twk1,$twk5
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	vxor		$out0,$in0,$twk5
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	le?vperm	$out3,$out3,$out3,$leperm
-	stvx_u		$out2,$x20,$out
-	stvx_u		$out3,$x30,$out
-	addi		$out,$out,0x40
-	bne		Lxts_dec6x_steal
-	b		Lxts_dec6x_done
-
-.align	4
-Lxts_dec6x_three:
-	vxor		$out0,$in3,$twk0
-	vxor		$out1,$in4,$twk1
-	vxor		$out2,$in5,$twk2
-	vxor		$out3,$out3,$out3
-	vxor		$out4,$out4,$out4
-
-	bl		_aesp8_xts_dec5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk3		# unused tweak
-	vmr		$twk1,$twk4
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	vxor		$out0,$in0,$twk4
-	le?vperm	$out2,$out2,$out2,$leperm
-	stvx_u		$out1,$x10,$out
-	stvx_u		$out2,$x20,$out
-	addi		$out,$out,0x30
-	bne		Lxts_dec6x_steal
-	b		Lxts_dec6x_done
-
-.align	4
-Lxts_dec6x_two:
-	vxor		$out0,$in4,$twk0
-	vxor		$out1,$in5,$twk1
-	vxor		$out2,$out2,$out2
-	vxor		$out3,$out3,$out3
-	vxor		$out4,$out4,$out4
-
-	bl		_aesp8_xts_dec5x
-
-	le?vperm	$out0,$out0,$out0,$leperm
-	vmr		$twk0,$twk2		# unused tweak
-	vmr		$twk1,$twk3
-	le?vperm	$out1,$out1,$out1,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	vxor		$out0,$in0,$twk3
-	stvx_u		$out1,$x10,$out
-	addi		$out,$out,0x20
-	bne		Lxts_dec6x_steal
-	b		Lxts_dec6x_done
-
-.align	4
-Lxts_dec6x_one:
-	vxor		$out0,$in5,$twk0
-	nop
-Loop_xts_dec1x:
-	vncipher	$out0,$out0,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vncipher	$out0,$out0,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Loop_xts_dec1x
-
-	subi		r0,$taillen,1
-	vncipher	$out0,$out0,v24
-
-	andi.		r0,r0,16
-	cmpwi		$taillen,0
-	vncipher	$out0,$out0,v25
-
-	sub		$inp,$inp,r0
-	vncipher	$out0,$out0,v26
-
-	lvx_u		$in0,0,$inp
-	vncipher	$out0,$out0,v27
-
-	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
-	vncipher	$out0,$out0,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-
-	vncipher	$out0,$out0,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-	 vxor		$twk0,$twk0,v31
-
-	le?vperm	$in0,$in0,$in0,$leperm
-	vncipher	$out0,$out0,v30
-
-	mtctr		$rounds
-	vncipherlast	$out0,$out0,$twk0
-
-	vmr		$twk0,$twk1		# unused tweak
-	vmr		$twk1,$twk2
-	le?vperm	$out0,$out0,$out0,$leperm
-	stvx_u		$out0,$x00,$out		# store output
-	addi		$out,$out,0x10
-	vxor		$out0,$in0,$twk2
-	bne		Lxts_dec6x_steal
-	b		Lxts_dec6x_done
-
-.align	4
-Lxts_dec6x_zero:
-	cmpwi		$taillen,0
-	beq		Lxts_dec6x_done
-
-	lvx_u		$in0,0,$inp
-	le?vperm	$in0,$in0,$in0,$leperm
-	vxor		$out0,$in0,$twk1
-Lxts_dec6x_steal:
-	vncipher	$out0,$out0,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vncipher	$out0,$out0,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		Lxts_dec6x_steal
-
-	add		$inp,$inp,$taillen
-	vncipher	$out0,$out0,v24
-
-	cmpwi		$taillen,0
-	vncipher	$out0,$out0,v25
-
-	lvx_u		$in0,0,$inp
-	vncipher	$out0,$out0,v26
-
-	lvsr		$inpperm,0,$taillen	# $in5 is no more
-	vncipher	$out0,$out0,v27
-
-	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
-	vncipher	$out0,$out0,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-
-	vncipher	$out0,$out0,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-	 vxor		$twk1,$twk1,v31
-
-	le?vperm	$in0,$in0,$in0,$leperm
-	vncipher	$out0,$out0,v30
-
-	vperm		$in0,$in0,$in0,$inpperm
-	vncipherlast	$tmp,$out0,$twk1
-
-	le?vperm	$out0,$tmp,$tmp,$leperm
-	le?stvx_u	$out0,0,$out
-	be?stvx_u	$tmp,0,$out
-
-	vxor		$out0,$out0,$out0
-	vspltisb	$out1,-1
-	vperm		$out0,$out0,$out1,$inpperm
-	vsel		$out0,$in0,$tmp,$out0
-	vxor		$out0,$out0,$twk0
-
-	subi		r30,$out,1
-	mtctr		$taillen
-Loop_xts_dec6x_steal:
-	lbzu		r0,1(r30)
-	stb		r0,16(r30)
-	bdnz		Loop_xts_dec6x_steal
-
-	li		$taillen,0
-	mtctr		$rounds
-	b		Loop_xts_dec1x		# one more time...
-
-.align	4
-Lxts_dec6x_done:
-	${UCMP}i	$ivp,0
-	beq		Lxts_dec6x_ret
-
-	vxor		$tweak,$twk0,$rndkey0
-	le?vperm	$tweak,$tweak,$tweak,$leperm
-	stvx_u		$tweak,0,$ivp
-
-Lxts_dec6x_ret:
-	mtlr		r11
-	li		r10,`$FRAME+15`
-	li		r11,`$FRAME+31`
-	stvx		$seven,r10,$sp		# wipe copies of round keys
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-	stvx		$seven,r10,$sp
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-	stvx		$seven,r10,$sp
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-	stvx		$seven,r10,$sp
-	addi		r10,r10,32
-	stvx		$seven,r11,$sp
-	addi		r11,r11,32
-
-	mtspr		256,$vrsave
-	lvx		v20,r10,$sp		# ABI says so
-	addi		r10,r10,32
-	lvx		v21,r11,$sp
-	addi		r11,r11,32
-	lvx		v22,r10,$sp
-	addi		r10,r10,32
-	lvx		v23,r11,$sp
-	addi		r11,r11,32
-	lvx		v24,r10,$sp
-	addi		r10,r10,32
-	lvx		v25,r11,$sp
-	addi		r11,r11,32
-	lvx		v26,r10,$sp
-	addi		r10,r10,32
-	lvx		v27,r11,$sp
-	addi		r11,r11,32
-	lvx		v28,r10,$sp
-	addi		r10,r10,32
-	lvx		v29,r11,$sp
-	addi		r11,r11,32
-	lvx		v30,r10,$sp
-	lvx		v31,r11,$sp
-	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
-	blr
-	.long		0
-	.byte		0,12,0x04,1,0x80,6,6,0
-	.long		0
-
-.align	5
-_aesp8_xts_dec5x:
-	vncipher	$out0,$out0,v24
-	vncipher	$out1,$out1,v24
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	vncipher	$out4,$out4,v24
-	lvx		v24,$x20,$key_		# round[3]
-	addi		$key_,$key_,0x20
-
-	vncipher	$out0,$out0,v25
-	vncipher	$out1,$out1,v25
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	vncipher	$out4,$out4,v25
-	lvx		v25,$x10,$key_		# round[4]
-	bdnz		_aesp8_xts_dec5x
-
-	subi		r0,$taillen,1
-	vncipher	$out0,$out0,v24
-	vncipher	$out1,$out1,v24
-	vncipher	$out2,$out2,v24
-	vncipher	$out3,$out3,v24
-	vncipher	$out4,$out4,v24
-
-	andi.		r0,r0,16
-	cmpwi		$taillen,0
-	vncipher	$out0,$out0,v25
-	vncipher	$out1,$out1,v25
-	vncipher	$out2,$out2,v25
-	vncipher	$out3,$out3,v25
-	vncipher	$out4,$out4,v25
-	 vxor		$twk0,$twk0,v31
-
-	sub		$inp,$inp,r0
-	vncipher	$out0,$out0,v26
-	vncipher	$out1,$out1,v26
-	vncipher	$out2,$out2,v26
-	vncipher	$out3,$out3,v26
-	vncipher	$out4,$out4,v26
-	 vxor		$in1,$twk1,v31
-
-	vncipher	$out0,$out0,v27
-	lvx_u		$in0,0,$inp
-	vncipher	$out1,$out1,v27
-	vncipher	$out2,$out2,v27
-	vncipher	$out3,$out3,v27
-	vncipher	$out4,$out4,v27
-	 vxor		$in2,$twk2,v31
-
-	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
-	vncipher	$out0,$out0,v28
-	vncipher	$out1,$out1,v28
-	vncipher	$out2,$out2,v28
-	vncipher	$out3,$out3,v28
-	vncipher	$out4,$out4,v28
-	lvx		v24,$x00,$key_		# re-pre-load round[1]
-	 vxor		$in3,$twk3,v31
-
-	vncipher	$out0,$out0,v29
-	le?vperm	$in0,$in0,$in0,$leperm
-	vncipher	$out1,$out1,v29
-	vncipher	$out2,$out2,v29
-	vncipher	$out3,$out3,v29
-	vncipher	$out4,$out4,v29
-	lvx		v25,$x10,$key_		# re-pre-load round[2]
-	 vxor		$in4,$twk4,v31
-
-	vncipher	$out0,$out0,v30
-	vncipher	$out1,$out1,v30
-	vncipher	$out2,$out2,v30
-	vncipher	$out3,$out3,v30
-	vncipher	$out4,$out4,v30
-
-	vncipherlast	$out0,$out0,$twk0
-	vncipherlast	$out1,$out1,$in1
-	vncipherlast	$out2,$out2,$in2
-	vncipherlast	$out3,$out3,$in3
-	vncipherlast	$out4,$out4,$in4
-	mtctr		$rounds
-	blr
-        .long   	0
-        .byte   	0,12,0x14,0,0,0,0,0
-___
-}}	}}}
-
-my $consts=1;
-foreach(split("\n",$code)) {
-        s/\`([^\`]*)\`/eval($1)/geo;
-
-	# constants table endian-specific conversion
-	if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
-	    my $conv=$3;
-	    my @bytes=();
-
-	    # convert to endian-agnostic format
-	    if ($1 eq "long") {
-	      foreach (split(/,\s*/,$2)) {
-		my $l = /^0/?oct:int;
-		push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
-	      }
-	    } else {
-		@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
-	    }
-
-	    # little-endian conversion
-	    if ($flavour =~ /le$/o) {
-		SWITCH: for($conv)  {
-		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
-		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
-		}
-	    }
-
-	    #emit
-	    print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
-	    next;
-	}
-	$consts=0 if (m/Lconsts:/o);	# end of table
-
-	# instructions prefixed with '?' are endian-specific and need
-	# to be adjusted accordingly...
-	if ($flavour =~ /le$/o) {	# little-endian
-	    s/le\?//o		or
-	    s/be\?/#be#/o	or
-	    s/\?lvsr/lvsl/o	or
-	    s/\?lvsl/lvsr/o	or
-	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
-	    s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
-	    s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
-	} else {			# big-endian
-	    s/le\?/#le#/o	or
-	    s/be\?//o		or
-	    s/\?([a-z]+)/$1/o;
-	}
-
-        print $_,"\n";
-}
-
-close STDOUT or die "error closing STDOUT: $!";

diff --git a/crypto/fipsmodule/aes/internal.h b/crypto/fipsmodule/aes/internal.h
index 0685bc4..98b2a14d 100644
--- a/crypto/fipsmodule/aes/internal.h
+++ b/crypto/fipsmodule/aes/internal.h

@@ -59,12 +59,6 @@
 OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_NEON_capable(); }
 #endif
 
-#elif defined(OPENSSL_PPC64LE)
-#define HWAES
-
-OPENSSL_INLINE int hwaes_capable(void) {
-  return CRYPTO_is_PPC64LE_vcrypto_capable();
-}
 #endif
 
 #endif  // !NO_ASM

diff --git a/crypto/fipsmodule/bcm.c b/crypto/fipsmodule/bcm.c
index 1792134..e2e4d90 100644
--- a/crypto/fipsmodule/bcm.c
+++ b/crypto/fipsmodule/bcm.c

@@ -101,7 +101,6 @@
 #include "self_check/fips.c"
 #include "self_check/self_check.c"
 #include "service_indicator/service_indicator.c"
-#include "sha/sha1-altivec.c"
 #include "sha/sha1.c"
 #include "sha/sha256.c"
 #include "sha/sha512.c"

diff --git a/crypto/fipsmodule/bn/bn.c b/crypto/fipsmodule/bn/bn.c
index f3fbb7a..93fae56 100644
--- a/crypto/fipsmodule/bn/bn.c
+++ b/crypto/fipsmodule/bn/bn.c

@@ -386,23 +386,6 @@
 }
 
 int bn_resize_words(BIGNUM *bn, size_t words) {
-#if defined(OPENSSL_PPC64LE)
-  // This is a workaround for a miscompilation bug in Clang 7.0.1 on POWER.
-  // The unittests catch the miscompilation, if it occurs, and it manifests
-  // as a crash in |bn_fits_in_words|.
-  //
-  // The bug only triggers if building in FIPS mode and with -O3. Clang 8.0.1
-  // has the same bug but this workaround is not effective there---I've not
-  // been able to find a workaround for 8.0.1.
-  //
-  // At the time of writing (2019-08-08), Clang git does *not* have this bug
-  // and does not need this workaroud. The current git version should go on to
-  // be Clang 10 thus, once we can depend on that, this can be removed.
-  if (value_barrier_w((size_t)bn->width == words)) {
-    return 1;
-  }
-#endif
-
   if ((size_t)bn->width <= words) {
     if (!bn_wexpand(bn, words)) {
       return 0;

diff --git a/crypto/fipsmodule/cipher/e_aes.c b/crypto/fipsmodule/cipher/e_aes.c
index e8e03fe..0db77b8 100644
--- a/crypto/fipsmodule/cipher/e_aes.c
+++ b/crypto/fipsmodule/cipher/e_aes.c

@@ -1468,8 +1468,6 @@
   return hwaes_capable() && crypto_gcm_clmul_enabled();
 #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
   return hwaes_capable() && CRYPTO_is_ARMv8_PMULL_capable();
-#elif defined(OPENSSL_PPC64LE)
-  return CRYPTO_is_PPC64LE_vcrypto_capable();
 #else
   return 0;
 #endif

diff --git a/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl b/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
deleted file mode 100644
index 0d12a77..0000000
--- a/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
+++ /dev/null

@@ -1,671 +0,0 @@
-#! /usr/bin/env perl
-# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
-#
-# Licensed under the OpenSSL license (the "License").  You may not use
-# this file except in compliance with the License.  You can obtain a copy
-# in the file LICENSE in the source distribution or at
-# https://www.openssl.org/source/license.html
-
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# GHASH for for PowerISA v2.07.
-#
-# July 2014
-#
-# Accurate performance measurements are problematic, because it's
-# always virtualized setup with possibly throttled processor.
-# Relative comparison is therefore more informative. This initial
-# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
-# faster than "4-bit" integer-only compiler-generated 64-bit code.
-# "Initial version" means that there is room for futher improvement.
-
-# May 2016
-#
-# 2x aggregated reduction improves performance by 50% (resulting
-# performance on POWER8 is 1 cycle per processed byte), and 4x
-# aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb).
-
-$flavour=shift;
-$output =shift;
-
-if ($flavour =~ /64/) {
-	$SIZE_T=8;
-	$LRSAVE=2*$SIZE_T;
-	$STU="stdu";
-	$POP="ld";
-	$PUSH="std";
-	$UCMP="cmpld";
-	$SHRI="srdi";
-} elsif ($flavour =~ /32/) {
-	$SIZE_T=4;
-	$LRSAVE=$SIZE_T;
-	$STU="stwu";
-	$POP="lwz";
-	$PUSH="stw";
-	$UCMP="cmplw";
-	$SHRI="srwi";
-} else { die "nonsense $flavour"; }
-
-$sp="r1";
-$FRAME=6*$SIZE_T+13*16;	# 13*16 is for v20-v31 offload
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-
-open OUT,"| $^X \"$xlate\" $flavour \"$output\"" || die "can't call $xlate: $!";
-*STDOUT=*OUT;
-
-my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6));	# argument block
-
-my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
-my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
-my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
-my $vrsave="r12";
-
-$code=<<___;
-.machine	"any"
-
-.text
-
-.globl	.gcm_init_p8
-.align	5
-.gcm_init_p8:
-	li		r0,-4096
-	li		r8,0x10
-	mfspr		$vrsave,256
-	li		r9,0x20
-	mtspr		256,r0
-	li		r10,0x30
-	lvx_u		$H,0,r4			# load H
-
-	vspltisb	$xC2,-16		# 0xf0
-	vspltisb	$t0,1			# one
-	vaddubm		$xC2,$xC2,$xC2		# 0xe0
-	vxor		$zero,$zero,$zero
-	vor		$xC2,$xC2,$t0		# 0xe1
-	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
-	vsldoi		$t1,$zero,$t0,1		# ...1
-	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
-	vspltisb	$t2,7
-	vor		$xC2,$xC2,$t1		# 0xc2....01
-	vspltb		$t1,$H,0		# most significant byte
-	vsl		$H,$H,$t0		# H<<=1
-	vsrab		$t1,$t1,$t2		# broadcast carry bit
-	vand		$t1,$t1,$xC2
-	vxor		$IN,$H,$t1		# twisted H
-
-	vsldoi		$H,$IN,$IN,8		# twist even more ...
-	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
-	vsldoi		$Hl,$zero,$H,8		# ... and split
-	vsldoi		$Hh,$H,$zero,8
-
-	stvx_u		$xC2,0,r3		# save pre-computed table
-	stvx_u		$Hl,r8,r3
-	li		r8,0x40
-	stvx_u		$H, r9,r3
-	li		r9,0x50
-	stvx_u		$Hh,r10,r3
-	li		r10,0x60
-
-	vpmsumd		$Xl,$IN,$Hl		# H.lo·H.lo
-	vpmsumd		$Xm,$IN,$H		# H.hi·H.lo+H.lo·H.hi
-	vpmsumd		$Xh,$IN,$Hh		# H.hi·H.hi
-
-	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
-
-	vsldoi		$t0,$Xm,$zero,8
-	vsldoi		$t1,$zero,$Xm,8
-	vxor		$Xl,$Xl,$t0
-	vxor		$Xh,$Xh,$t1
-
-	vsldoi		$Xl,$Xl,$Xl,8
-	vxor		$Xl,$Xl,$t2
-
-	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
-	vpmsumd		$Xl,$Xl,$xC2
-	vxor		$t1,$t1,$Xh
-	vxor		$IN1,$Xl,$t1
-
-	vsldoi		$H2,$IN1,$IN1,8
-	vsldoi		$H2l,$zero,$H2,8
-	vsldoi		$H2h,$H2,$zero,8
-
-	stvx_u		$H2l,r8,r3		# save H^2
-	li		r8,0x70
-	stvx_u		$H2,r9,r3
-	li		r9,0x80
-	stvx_u		$H2h,r10,r3
-	li		r10,0x90
-___
-{
-my ($t4,$t5,$t6) = ($Hl,$H,$Hh);
-$code.=<<___;
-	vpmsumd		$Xl,$IN,$H2l		# H.lo·H^2.lo
-	 vpmsumd	$Xl1,$IN1,$H2l		# H^2.lo·H^2.lo
-	vpmsumd		$Xm,$IN,$H2		# H.hi·H^2.lo+H.lo·H^2.hi
-	 vpmsumd	$Xm1,$IN1,$H2		# H^2.hi·H^2.lo+H^2.lo·H^2.hi
-	vpmsumd		$Xh,$IN,$H2h		# H.hi·H^2.hi
-	 vpmsumd	$Xh1,$IN1,$H2h		# H^2.hi·H^2.hi
-
-	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
-	 vpmsumd	$t6,$Xl1,$xC2		# 1st reduction phase
-
-	vsldoi		$t0,$Xm,$zero,8
-	vsldoi		$t1,$zero,$Xm,8
-	 vsldoi		$t4,$Xm1,$zero,8
-	 vsldoi		$t5,$zero,$Xm1,8
-	vxor		$Xl,$Xl,$t0
-	vxor		$Xh,$Xh,$t1
-	 vxor		$Xl1,$Xl1,$t4
-	 vxor		$Xh1,$Xh1,$t5
-
-	vsldoi		$Xl,$Xl,$Xl,8
-	 vsldoi		$Xl1,$Xl1,$Xl1,8
-	vxor		$Xl,$Xl,$t2
-	 vxor		$Xl1,$Xl1,$t6
-
-	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
-	 vsldoi		$t5,$Xl1,$Xl1,8		# 2nd reduction phase
-	vpmsumd		$Xl,$Xl,$xC2
-	 vpmsumd	$Xl1,$Xl1,$xC2
-	vxor		$t1,$t1,$Xh
-	 vxor		$t5,$t5,$Xh1
-	vxor		$Xl,$Xl,$t1
-	 vxor		$Xl1,$Xl1,$t5
-
-	vsldoi		$H,$Xl,$Xl,8
-	 vsldoi		$H2,$Xl1,$Xl1,8
-	vsldoi		$Hl,$zero,$H,8
-	vsldoi		$Hh,$H,$zero,8
-	 vsldoi		$H2l,$zero,$H2,8
-	 vsldoi		$H2h,$H2,$zero,8
-
-	stvx_u		$Hl,r8,r3		# save H^3
-	li		r8,0xa0
-	stvx_u		$H,r9,r3
-	li		r9,0xb0
-	stvx_u		$Hh,r10,r3
-	li		r10,0xc0
-	 stvx_u		$H2l,r8,r3		# save H^4
-	 stvx_u		$H2,r9,r3
-	 stvx_u		$H2h,r10,r3
-
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,2,0
-	.long		0
-.size	.gcm_init_p8,.-.gcm_init_p8
-___
-}
-$code.=<<___;
-.globl	.gcm_gmult_p8
-.align	5
-.gcm_gmult_p8:
-	lis		r0,0xfff8
-	li		r8,0x10
-	mfspr		$vrsave,256
-	li		r9,0x20
-	mtspr		256,r0
-	li		r10,0x30
-	lvx_u		$IN,0,$Xip		# load Xi
-
-	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
-	 le?lvsl	$lemask,r0,r0
-	lvx_u		$H, r9,$Htbl
-	 le?vspltisb	$t0,0x07
-	lvx_u		$Hh,r10,$Htbl
-	 le?vxor	$lemask,$lemask,$t0
-	lvx_u		$xC2,0,$Htbl
-	 le?vperm	$IN,$IN,$IN,$lemask
-	vxor		$zero,$zero,$zero
-
-	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
-	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
-	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
-
-	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
-
-	vsldoi		$t0,$Xm,$zero,8
-	vsldoi		$t1,$zero,$Xm,8
-	vxor		$Xl,$Xl,$t0
-	vxor		$Xh,$Xh,$t1
-
-	vsldoi		$Xl,$Xl,$Xl,8
-	vxor		$Xl,$Xl,$t2
-
-	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
-	vpmsumd		$Xl,$Xl,$xC2
-	vxor		$t1,$t1,$Xh
-	vxor		$Xl,$Xl,$t1
-
-	le?vperm	$Xl,$Xl,$Xl,$lemask
-	stvx_u		$Xl,0,$Xip		# write out Xi
-
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,2,0
-	.long		0
-.size	.gcm_gmult_p8,.-.gcm_gmult_p8
-
-.globl	.gcm_ghash_p8
-.align	5
-.gcm_ghash_p8:
-	li		r0,-4096
-	li		r8,0x10
-	mfspr		$vrsave,256
-	li		r9,0x20
-	mtspr		256,r0
-	li		r10,0x30
-	lvx_u		$Xl,0,$Xip		# load Xi
-
-	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
-	li		r8,0x40
-	 le?lvsl	$lemask,r0,r0
-	lvx_u		$H, r9,$Htbl
-	li		r9,0x50
-	 le?vspltisb	$t0,0x07
-	lvx_u		$Hh,r10,$Htbl
-	li		r10,0x60
-	 le?vxor	$lemask,$lemask,$t0
-	lvx_u		$xC2,0,$Htbl
-	 le?vperm	$Xl,$Xl,$Xl,$lemask
-	vxor		$zero,$zero,$zero
-
-	${UCMP}i	$len,64
-	bge		Lgcm_ghash_p8_4x
-
-	lvx_u		$IN,0,$inp
-	addi		$inp,$inp,16
-	subic.		$len,$len,16
-	 le?vperm	$IN,$IN,$IN,$lemask
-	vxor		$IN,$IN,$Xl
-	beq		Lshort
-
-	lvx_u		$H2l,r8,$Htbl		# load H^2
-	li		r8,16
-	lvx_u		$H2, r9,$Htbl
-	add		r9,$inp,$len		# end of input
-	lvx_u		$H2h,r10,$Htbl
-	be?b		Loop_2x
-
-.align	5
-Loop_2x:
-	lvx_u		$IN1,0,$inp
-	le?vperm	$IN1,$IN1,$IN1,$lemask
-
-	 subic		$len,$len,32
-	vpmsumd		$Xl,$IN,$H2l		# H^2.lo·Xi.lo
-	 vpmsumd	$Xl1,$IN1,$Hl		# H.lo·Xi+1.lo
-	 subfe		r0,r0,r0		# borrow?-1:0
-	vpmsumd		$Xm,$IN,$H2		# H^2.hi·Xi.lo+H^2.lo·Xi.hi
-	 vpmsumd	$Xm1,$IN1,$H		# H.hi·Xi+1.lo+H.lo·Xi+1.hi
-	 and		r0,r0,$len
-	vpmsumd		$Xh,$IN,$H2h		# H^2.hi·Xi.hi
-	 vpmsumd	$Xh1,$IN1,$Hh		# H.hi·Xi+1.hi
-	 add		$inp,$inp,r0
-
-	vxor		$Xl,$Xl,$Xl1
-	vxor		$Xm,$Xm,$Xm1
-
-	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
-
-	vsldoi		$t0,$Xm,$zero,8
-	vsldoi		$t1,$zero,$Xm,8
-	 vxor		$Xh,$Xh,$Xh1
-	vxor		$Xl,$Xl,$t0
-	vxor		$Xh,$Xh,$t1
-
-	vsldoi		$Xl,$Xl,$Xl,8
-	vxor		$Xl,$Xl,$t2
-	 lvx_u		$IN,r8,$inp
-	 addi		$inp,$inp,32
-
-	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
-	vpmsumd		$Xl,$Xl,$xC2
-	 le?vperm	$IN,$IN,$IN,$lemask
-	vxor		$t1,$t1,$Xh
-	vxor		$IN,$IN,$t1
-	vxor		$IN,$IN,$Xl
-	$UCMP		r9,$inp
-	bgt		Loop_2x			# done yet?
-
-	cmplwi		$len,0
-	bne		Leven
-
-Lshort:
-	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
-	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
-	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
-
-	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
-
-	vsldoi		$t0,$Xm,$zero,8
-	vsldoi		$t1,$zero,$Xm,8
-	vxor		$Xl,$Xl,$t0
-	vxor		$Xh,$Xh,$t1
-
-	vsldoi		$Xl,$Xl,$Xl,8
-	vxor		$Xl,$Xl,$t2
-
-	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
-	vpmsumd		$Xl,$Xl,$xC2
-	vxor		$t1,$t1,$Xh
-
-Leven:
-	vxor		$Xl,$Xl,$t1
-	le?vperm	$Xl,$Xl,$Xl,$lemask
-	stvx_u		$Xl,0,$Xip		# write out Xi
-
-	mtspr		256,$vrsave
-	blr
-	.long		0
-	.byte		0,12,0x14,0,0,0,4,0
-	.long		0
-___
-{
-my ($Xl3,$Xm2,$IN2,$H3l,$H3,$H3h,
-    $Xh3,$Xm3,$IN3,$H4l,$H4,$H4h) = map("v$_",(20..31));
-my $IN0=$IN;
-my ($H21l,$H21h,$loperm,$hiperm) = ($Hl,$Hh,$H2l,$H2h);
-
-$code.=<<___;
-.align	5
-.gcm_ghash_p8_4x:
-Lgcm_ghash_p8_4x:
-	$STU		$sp,-$FRAME($sp)
-	li		r10,`15+6*$SIZE_T`
-	li		r11,`31+6*$SIZE_T`
-	stvx		v20,r10,$sp
-	addi		r10,r10,32
-	stvx		v21,r11,$sp
-	addi		r11,r11,32
-	stvx		v22,r10,$sp
-	addi		r10,r10,32
-	stvx		v23,r11,$sp
-	addi		r11,r11,32
-	stvx		v24,r10,$sp
-	addi		r10,r10,32
-	stvx		v25,r11,$sp
-	addi		r11,r11,32
-	stvx		v26,r10,$sp
-	addi		r10,r10,32
-	stvx		v27,r11,$sp
-	addi		r11,r11,32
-	stvx		v28,r10,$sp
-	addi		r10,r10,32
-	stvx		v29,r11,$sp
-	addi		r11,r11,32
-	stvx		v30,r10,$sp
-	li		r10,0x60
-	stvx		v31,r11,$sp
-	li		r0,-1
-	stw		$vrsave,`$FRAME-4`($sp)	# save vrsave
-	mtspr		256,r0			# preserve all AltiVec registers
-
-	lvsl		$t0,0,r8		# 0x0001..0e0f
-	#lvx_u		$H2l,r8,$Htbl		# load H^2
-	li		r8,0x70
-	lvx_u		$H2, r9,$Htbl
-	li		r9,0x80
-	vspltisb	$t1,8			# 0x0808..0808
-	#lvx_u		$H2h,r10,$Htbl
-	li		r10,0x90
-	lvx_u		$H3l,r8,$Htbl		# load H^3
-	li		r8,0xa0
-	lvx_u		$H3, r9,$Htbl
-	li		r9,0xb0
-	lvx_u		$H3h,r10,$Htbl
-	li		r10,0xc0
-	lvx_u		$H4l,r8,$Htbl		# load H^4
-	li		r8,0x10
-	lvx_u		$H4, r9,$Htbl
-	li		r9,0x20
-	lvx_u		$H4h,r10,$Htbl
-	li		r10,0x30
-
-	vsldoi		$t2,$zero,$t1,8		# 0x0000..0808
-	vaddubm		$hiperm,$t0,$t2		# 0x0001..1617
-	vaddubm		$loperm,$t1,$hiperm	# 0x0809..1e1f
-
-	$SHRI		$len,$len,4		# this allows to use sign bit
-						# as carry
-	lvx_u		$IN0,0,$inp		# load input
-	lvx_u		$IN1,r8,$inp
-	subic.		$len,$len,8
-	lvx_u		$IN2,r9,$inp
-	lvx_u		$IN3,r10,$inp
-	addi		$inp,$inp,0x40
-	le?vperm	$IN0,$IN0,$IN0,$lemask
-	le?vperm	$IN1,$IN1,$IN1,$lemask
-	le?vperm	$IN2,$IN2,$IN2,$lemask
-	le?vperm	$IN3,$IN3,$IN3,$lemask
-
-	vxor		$Xh,$IN0,$Xl
-
-	 vpmsumd	$Xl1,$IN1,$H3l
-	 vpmsumd	$Xm1,$IN1,$H3
-	 vpmsumd	$Xh1,$IN1,$H3h
-
-	 vperm		$H21l,$H2,$H,$hiperm
-	 vperm		$t0,$IN2,$IN3,$loperm
-	 vperm		$H21h,$H2,$H,$loperm
-	 vperm		$t1,$IN2,$IN3,$hiperm
-	 vpmsumd	$Xm2,$IN2,$H2		# H^2.lo·Xi+2.hi+H^2.hi·Xi+2.lo
-	 vpmsumd	$Xl3,$t0,$H21l		# H^2.lo·Xi+2.lo+H.lo·Xi+3.lo
-	 vpmsumd	$Xm3,$IN3,$H		# H.hi·Xi+3.lo  +H.lo·Xi+3.hi
-	 vpmsumd	$Xh3,$t1,$H21h		# H^2.hi·Xi+2.hi+H.hi·Xi+3.hi
-
-	 vxor		$Xm2,$Xm2,$Xm1
-	 vxor		$Xl3,$Xl3,$Xl1
-	 vxor		$Xm3,$Xm3,$Xm2
-	 vxor		$Xh3,$Xh3,$Xh1
-
-	blt		Ltail_4x
-
-Loop_4x:
-	lvx_u		$IN0,0,$inp
-	lvx_u		$IN1,r8,$inp
-	subic.		$len,$len,4
-	lvx_u		$IN2,r9,$inp
-	lvx_u		$IN3,r10,$inp
-	addi		$inp,$inp,0x40
-	le?vperm	$IN1,$IN1,$IN1,$lemask
-	le?vperm	$IN2,$IN2,$IN2,$lemask
-	le?vperm	$IN3,$IN3,$IN3,$lemask
-	le?vperm	$IN0,$IN0,$IN0,$lemask
-
-	vpmsumd		$Xl,$Xh,$H4l		# H^4.lo·Xi.lo
-	vpmsumd		$Xm,$Xh,$H4		# H^4.hi·Xi.lo+H^4.lo·Xi.hi
-	vpmsumd		$Xh,$Xh,$H4h		# H^4.hi·Xi.hi
-	 vpmsumd	$Xl1,$IN1,$H3l
-	 vpmsumd	$Xm1,$IN1,$H3
-	 vpmsumd	$Xh1,$IN1,$H3h
-
-	vxor		$Xl,$Xl,$Xl3
-	vxor		$Xm,$Xm,$Xm3
-	vxor		$Xh,$Xh,$Xh3
-	 vperm		$t0,$IN2,$IN3,$loperm
-	 vperm		$t1,$IN2,$IN3,$hiperm
-
-	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
-	 vpmsumd	$Xl3,$t0,$H21l		# H.lo·Xi+3.lo  +H^2.lo·Xi+2.lo
-	 vpmsumd	$Xh3,$t1,$H21h		# H.hi·Xi+3.hi  +H^2.hi·Xi+2.hi
-
-	vsldoi		$t0,$Xm,$zero,8
-	vsldoi		$t1,$zero,$Xm,8
-	vxor		$Xl,$Xl,$t0
-	vxor		$Xh,$Xh,$t1
-
-	vsldoi		$Xl,$Xl,$Xl,8
-	vxor		$Xl,$Xl,$t2
-
-	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
-	 vpmsumd	$Xm2,$IN2,$H2		# H^2.hi·Xi+2.lo+H^2.lo·Xi+2.hi
-	 vpmsumd	$Xm3,$IN3,$H		# H.hi·Xi+3.lo  +H.lo·Xi+3.hi
-	vpmsumd		$Xl,$Xl,$xC2
-
-	 vxor		$Xl3,$Xl3,$Xl1
-	 vxor		$Xh3,$Xh3,$Xh1
-	vxor		$Xh,$Xh,$IN0
-	 vxor		$Xm2,$Xm2,$Xm1
-	vxor		$Xh,$Xh,$t1
-	 vxor		$Xm3,$Xm3,$Xm2
-	vxor		$Xh,$Xh,$Xl
-	bge		Loop_4x
-
-Ltail_4x:
-	vpmsumd		$Xl,$Xh,$H4l		# H^4.lo·Xi.lo
-	vpmsumd		$Xm,$Xh,$H4		# H^4.hi·Xi.lo+H^4.lo·Xi.hi
-	vpmsumd		$Xh,$Xh,$H4h		# H^4.hi·Xi.hi
-
-	vxor		$Xl,$Xl,$Xl3
-	vxor		$Xm,$Xm,$Xm3
-
-	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
-
-	vsldoi		$t0,$Xm,$zero,8
-	vsldoi		$t1,$zero,$Xm,8
-	 vxor		$Xh,$Xh,$Xh3
-	vxor		$Xl,$Xl,$t0
-	vxor		$Xh,$Xh,$t1
-
-	vsldoi		$Xl,$Xl,$Xl,8
-	vxor		$Xl,$Xl,$t2
-
-	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
-	vpmsumd		$Xl,$Xl,$xC2
-	vxor		$t1,$t1,$Xh
-	vxor		$Xl,$Xl,$t1
-
-	addic.		$len,$len,4
-	beq		Ldone_4x
-
-	lvx_u		$IN0,0,$inp
-	${UCMP}i	$len,2
-	li		$len,-4
-	blt		Lone
-	lvx_u		$IN1,r8,$inp
-	beq		Ltwo
-
-Lthree:
-	lvx_u		$IN2,r9,$inp
-	le?vperm	$IN0,$IN0,$IN0,$lemask
-	le?vperm	$IN1,$IN1,$IN1,$lemask
-	le?vperm	$IN2,$IN2,$IN2,$lemask
-
-	vxor		$Xh,$IN0,$Xl
-	vmr		$H4l,$H3l
-	vmr		$H4, $H3
-	vmr		$H4h,$H3h
-
-	vperm		$t0,$IN1,$IN2,$loperm
-	vperm		$t1,$IN1,$IN2,$hiperm
-	vpmsumd		$Xm2,$IN1,$H2		# H^2.lo·Xi+1.hi+H^2.hi·Xi+1.lo
-	vpmsumd		$Xm3,$IN2,$H		# H.hi·Xi+2.lo  +H.lo·Xi+2.hi
-	vpmsumd		$Xl3,$t0,$H21l		# H^2.lo·Xi+1.lo+H.lo·Xi+2.lo
-	vpmsumd		$Xh3,$t1,$H21h		# H^2.hi·Xi+1.hi+H.hi·Xi+2.hi
-
-	vxor		$Xm3,$Xm3,$Xm2
-	b		Ltail_4x
-
-.align	4
-Ltwo:
-	le?vperm	$IN0,$IN0,$IN0,$lemask
-	le?vperm	$IN1,$IN1,$IN1,$lemask
-
-	vxor		$Xh,$IN0,$Xl
-	vperm		$t0,$zero,$IN1,$loperm
-	vperm		$t1,$zero,$IN1,$hiperm
-
-	vsldoi		$H4l,$zero,$H2,8
-	vmr		$H4, $H2
-	vsldoi		$H4h,$H2,$zero,8
-
-	vpmsumd		$Xl3,$t0, $H21l		# H.lo·Xi+1.lo
-	vpmsumd		$Xm3,$IN1,$H		# H.hi·Xi+1.lo+H.lo·Xi+2.hi
-	vpmsumd		$Xh3,$t1, $H21h		# H.hi·Xi+1.hi
-
-	b		Ltail_4x
-
-.align	4
-Lone:
-	le?vperm	$IN0,$IN0,$IN0,$lemask
-
-	vsldoi		$H4l,$zero,$H,8
-	vmr		$H4, $H
-	vsldoi		$H4h,$H,$zero,8
-
-	vxor		$Xh,$IN0,$Xl
-	vxor		$Xl3,$Xl3,$Xl3
-	vxor		$Xm3,$Xm3,$Xm3
-	vxor		$Xh3,$Xh3,$Xh3
-
-	b		Ltail_4x
-
-Ldone_4x:
-	le?vperm	$Xl,$Xl,$Xl,$lemask
-	stvx_u		$Xl,0,$Xip		# write out Xi
-
-	li		r10,`15+6*$SIZE_T`
-	li		r11,`31+6*$SIZE_T`
-	mtspr		256,$vrsave
-	lvx		v20,r10,$sp
-	addi		r10,r10,32
-	lvx		v21,r11,$sp
-	addi		r11,r11,32
-	lvx		v22,r10,$sp
-	addi		r10,r10,32
-	lvx		v23,r11,$sp
-	addi		r11,r11,32
-	lvx		v24,r10,$sp
-	addi		r10,r10,32
-	lvx		v25,r11,$sp
-	addi		r11,r11,32
-	lvx		v26,r10,$sp
-	addi		r10,r10,32
-	lvx		v27,r11,$sp
-	addi		r11,r11,32
-	lvx		v28,r10,$sp
-	addi		r10,r10,32
-	lvx		v29,r11,$sp
-	addi		r11,r11,32
-	lvx		v30,r10,$sp
-	lvx		v31,r11,$sp
-	addi		$sp,$sp,$FRAME
-	blr
-	.long		0
-	.byte		0,12,0x04,0,0x80,0,4,0
-	.long		0
-___
-}
-$code.=<<___;
-.size	.gcm_ghash_p8,.-.gcm_ghash_p8
-
-.asciz  "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
-.align  2
-___
-
-foreach (split("\n",$code)) {
-	s/\`([^\`]*)\`/eval $1/geo;
-
-	if ($flavour =~ /le$/o) {	# little-endian
-	    s/le\?//o		or
-	    s/be\?/#be#/o;
-	} else {
-	    s/le\?/#le#/o	or
-	    s/be\?//o;
-	}
-	print $_,"\n";
-}
-
-close STDOUT or die "error closing STDOUT: $!"; # enforce flush

diff --git a/crypto/fipsmodule/modes/gcm.c b/crypto/fipsmodule/modes/gcm.c
index 11a0b20..f22fa9d 100644
--- a/crypto/fipsmodule/modes/gcm.c
+++ b/crypto/fipsmodule/modes/gcm.c

@@ -230,13 +230,6 @@
     *out_hash = gcm_ghash_neon;
     return;
   }
-#elif defined(GHASH_ASM_PPC64LE)
-  if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
-    gcm_init_p8(out_table, H);
-    *out_mult = gcm_gmult_p8;
-    *out_hash = gcm_ghash_p8;
-    return;
-  }
 #endif
 
   gcm_init_nohw(out_table, H);

diff --git a/crypto/fipsmodule/modes/gcm_test.cc b/crypto/fipsmodule/modes/gcm_test.cc
index 324d0e8..e7fcd5c 100644
--- a/crypto/fipsmodule/modes/gcm_test.cc
+++ b/crypto/fipsmodule/modes/gcm_test.cc

@@ -222,15 +222,5 @@
     }
   }
 #endif
-
-#if defined(GHASH_ASM_PPC64LE)
-  if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
-    CHECK_ABI(gcm_init_p8, Htable, kH);
-    CHECK_ABI(gcm_gmult_p8, X, Htable);
-    for (size_t blocks : kBlockCounts) {
-      CHECK_ABI(gcm_ghash_p8, X, Htable, buf, 16 * blocks);
-    }
-  }
-#endif  // GHASH_ASM_PPC64LE
 }
 #endif  // SUPPORTS_ABI_TEST && !OPENSSL_NO_ASM

diff --git a/crypto/fipsmodule/modes/internal.h b/crypto/fipsmodule/modes/internal.h
index 8a0a75f..d77efca 100644
--- a/crypto/fipsmodule/modes/internal.h
+++ b/crypto/fipsmodule/modes/internal.h

@@ -308,13 +308,6 @@
                         void *Xi, uint8_t *ivec, const AES_KEY *key);
 #endif
 
-#elif defined(OPENSSL_PPC64LE)
-#define GHASH_ASM_PPC64LE
-#define GCM_FUNCREF
-void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
-void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]);
-void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
-                  size_t len);
 #endif
 #endif  // OPENSSL_NO_ASM
 

diff --git a/crypto/fipsmodule/rand/rand.c b/crypto/fipsmodule/rand/rand.c
index cb1ee7d..41754c6 100644
--- a/crypto/fipsmodule/rand/rand.c
+++ b/crypto/fipsmodule/rand/rand.c

@@ -416,11 +416,6 @@
     // Take a read lock around accesses to |state->drbg|. This is needed to
     // avoid returning bad entropy if we race with
     // |rand_thread_state_clear_all|.
-    //
-    // This lock must be taken after any calls to |CRYPTO_sysrand| to avoid a
-    // bug on ppc64le. glibc may implement pthread locks by wrapping user code
-    // in a hardware transaction, but, on some older versions of glibc and the
-    // kernel, syscalls made with |syscall| did not abort the transaction.
     CRYPTO_STATIC_MUTEX_lock_read(state_clear_all_lock_bss_get());
 #endif
     if (!CTR_DRBG_reseed(&state->drbg, seed, reseed_additional_data,

diff --git a/crypto/fipsmodule/sha/internal.h b/crypto/fipsmodule/sha/internal.h
index cc90914..605f166 100644
--- a/crypto/fipsmodule/sha/internal.h
+++ b/crypto/fipsmodule/sha/internal.h

@@ -22,23 +22,14 @@
 #endif
 
 
-#if defined(OPENSSL_PPC64LE) ||                          \
-    (!defined(OPENSSL_NO_ASM) &&                         \
-     (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
-      defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)))
-// POWER has an intrinsics-based implementation of SHA-1 and thus the functions
-// normally defined in assembly are available even with |OPENSSL_NO_ASM| in
-// this case.
-#define SHA1_ASM
-void sha1_block_data_order(uint32_t *state, const uint8_t *in,
-                           size_t num_blocks);
-#endif
-
 #if !defined(OPENSSL_NO_ASM) &&                         \
     (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
      defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
+#define SHA1_ASM
 #define SHA256_ASM
 #define SHA512_ASM
+void sha1_block_data_order(uint32_t *state, const uint8_t *in,
+                           size_t num_blocks);
 void sha256_block_data_order(uint32_t *state, const uint8_t *in,
                              size_t num_blocks);
 void sha512_block_data_order(uint64_t *state, const uint8_t *in,

diff --git a/crypto/fipsmodule/sha/sha1-altivec.c b/crypto/fipsmodule/sha/sha1-altivec.c
deleted file mode 100644
index 3152827..0000000
--- a/crypto/fipsmodule/sha/sha1-altivec.c
+++ /dev/null

@@ -1,361 +0,0 @@
-/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
- * All rights reserved.
- *
- * This package is an SSL implementation written
- * by Eric Young (eay@cryptsoft.com).
- * The implementation was written so as to conform with Netscapes SSL.
- *
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to.  The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson (tjh@cryptsoft.com).
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *    "This product includes cryptographic software written by
- *     Eric Young (eay@cryptsoft.com)"
- *    The word 'cryptographic' can be left out if the rouines from the library
- *    being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from
- *    the apps directory (application code) you must include an acknowledgement:
- *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed.  i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.] */
-
-// Altivec-optimized SHA1 in C. This is tested on ppc64le only.
-//
-// References:
-// https://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
-// http://arctic.org/~dean/crypto/sha1.html
-//
-// This code used the generic SHA-1 from OpenSSL as a basis and AltiVec
-// optimisations were added on top.
-
-#include <openssl/sha.h>
-
-#if defined(OPENSSL_PPC64LE)
-
-#include <altivec.h>
-
-void sha1_block_data_order(uint32_t *state, const uint8_t *data, size_t num);
-
-static uint32_t rotate(uint32_t a, int n) { return (a << n) | (a >> (32 - n)); }
-
-typedef vector unsigned int vec_uint32_t;
-typedef vector unsigned char vec_uint8_t;
-
-// Vector constants
-static const vec_uint8_t k_swap_endianness = {3,  2,  1, 0, 7,  6,  5,  4,
-                                              11, 10, 9, 8, 15, 14, 13, 12};
-
-// Shift amounts for byte and bit shifts and rotations
-static const vec_uint8_t k_4_bytes = {32, 32, 32, 32, 32, 32, 32, 32,
-                                      32, 32, 32, 32, 32, 32, 32, 32};
-static const vec_uint8_t k_12_bytes = {96, 96, 96, 96, 96, 96, 96, 96,
-                                       96, 96, 96, 96, 96, 96, 96, 96};
-
-#define K_00_19 0x5a827999UL
-#define K_20_39 0x6ed9eba1UL
-#define K_40_59 0x8f1bbcdcUL
-#define K_60_79 0xca62c1d6UL
-
-// Vector versions of the above.
-static const vec_uint32_t K_00_19_x_4 = {K_00_19, K_00_19, K_00_19, K_00_19};
-static const vec_uint32_t K_20_39_x_4 = {K_20_39, K_20_39, K_20_39, K_20_39};
-static const vec_uint32_t K_40_59_x_4 = {K_40_59, K_40_59, K_40_59, K_40_59};
-static const vec_uint32_t K_60_79_x_4 = {K_60_79, K_60_79, K_60_79, K_60_79};
-
-// vector message scheduling: compute message schedule for round i..i+3 where i
-// is divisible by 4. We return the schedule w[i..i+3] as a vector. In
-// addition, we also precompute sum w[i..+3] and an additive constant K. This
-// is done to offload some computation of f() in the integer execution units.
-//
-// Byte shifting code below may not be correct for big-endian systems.
-static vec_uint32_t sched_00_15(vec_uint32_t *pre_added, const void *data,
-                                vec_uint32_t k) {
-  const vector unsigned char unaligned_data =
-    vec_vsx_ld(0, (const unsigned char*) data);
-  const vec_uint32_t v = (vec_uint32_t) unaligned_data;
-  const vec_uint32_t w = vec_perm(v, v, k_swap_endianness);
-  vec_st(w + k, 0, pre_added);
-  return w;
-}
-
-// Compute w[i..i+3] using these steps for i in [16, 20, 24, 28]
-//
-// w'[i  ]  = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) <<< 1
-// w'[i+1]  = (w[i-2] ^ w[i-7] ^ w[i-13] ^ w[i-15]) <<< 1
-// w'[i+2]  = (w[i-1] ^ w[i-6] ^ w[i-12] ^ w[i-14]) <<< 1
-// w'[i+3]  = (     0 ^ w[i-5] ^ w[i-11] ^ w[i-13]) <<< 1
-//
-// w[  i] = w'[  i]
-// w[i+1] = w'[i+1]
-// w[i+2] = w'[i+2]
-// w[i+3] = w'[i+3] ^ (w'[i] <<< 1)
-static vec_uint32_t sched_16_31(vec_uint32_t *pre_added, vec_uint32_t minus_4,
-                                vec_uint32_t minus_8, vec_uint32_t minus_12,
-                                vec_uint32_t minus_16, vec_uint32_t k) {
-  const vec_uint32_t minus_3 = vec_sro(minus_4, k_4_bytes);
-  const vec_uint32_t minus_14 = vec_sld((minus_12), (minus_16), 8);
-  const vec_uint32_t k_1_bit = vec_splat_u32(1);
-  const vec_uint32_t w_prime =
-      vec_rl(minus_3 ^ minus_8 ^ minus_14 ^ minus_16, k_1_bit);
-  const vec_uint32_t w =
-      w_prime ^ vec_rl(vec_slo(w_prime, k_12_bytes), k_1_bit);
-  vec_st(w + k, 0, pre_added);
-  return w;
-}
-
-// Compute w[i..i+3] using this relation for i in [32, 36, 40 ... 76]
-// w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]), 2) <<< 2
-static vec_uint32_t sched_32_79(vec_uint32_t *pre_added, vec_uint32_t minus_4,
-                                vec_uint32_t minus_8, vec_uint32_t minus_16,
-                                vec_uint32_t minus_28, vec_uint32_t minus_32,
-                                vec_uint32_t k) {
-  const vec_uint32_t minus_6 = vec_sld(minus_4, minus_8, 8);
-  const vec_uint32_t k_2_bits = vec_splat_u32(2);
-  const vec_uint32_t w =
-      vec_rl(minus_6 ^ minus_16 ^ minus_28 ^ minus_32, k_2_bits);
-  vec_st(w + k, 0, pre_added);
-  return w;
-}
-
-// As pointed out by Wei Dai <weidai@eskimo.com>, F() below can be simplified
-// to the code in F_00_19. Wei attributes these optimisations to Peter
-// Gutmann's SHS code, and he attributes it to Rich Schroeppel. #define
-// F(x,y,z) (((x) & (y))  |  ((~(x)) & (z))) I've just become aware of another
-// tweak to be made, again from Wei Dai, in F_40_59, (x&a)|(y&a) -> (x|y)&a
-#define F_00_19(b, c, d) ((((c) ^ (d)) & (b)) ^ (d))
-#define F_20_39(b, c, d) ((b) ^ (c) ^ (d))
-#define F_40_59(b, c, d) (((b) & (c)) | (((b) | (c)) & (d)))
-#define F_60_79(b, c, d) F_20_39(b, c, d)
-
-// We pre-added the K constants during message scheduling.
-#define BODY_00_19(i, a, b, c, d, e, f)                         \
-  do {                                                          \
-    (f) = w[i] + (e) + rotate((a), 5) + F_00_19((b), (c), (d)); \
-    (b) = rotate((b), 30);                                      \
-  } while (0)
-
-#define BODY_20_39(i, a, b, c, d, e, f)                         \
-  do {                                                          \
-    (f) = w[i] + (e) + rotate((a), 5) + F_20_39((b), (c), (d)); \
-    (b) = rotate((b), 30);                                      \
-  } while (0)
-
-#define BODY_40_59(i, a, b, c, d, e, f)                         \
-  do {                                                          \
-    (f) = w[i] + (e) + rotate((a), 5) + F_40_59((b), (c), (d)); \
-    (b) = rotate((b), 30);                                      \
-  } while (0)
-
-#define BODY_60_79(i, a, b, c, d, e, f)                         \
-  do {                                                          \
-    (f) = w[i] + (e) + rotate((a), 5) + F_60_79((b), (c), (d)); \
-    (b) = rotate((b), 30);                                      \
-  } while (0)
-
-void sha1_block_data_order(uint32_t *state, const uint8_t *data, size_t num) {
-  uint32_t A, B, C, D, E, T;
-
-  A = state[0];
-  B = state[1];
-  C = state[2];
-  D = state[3];
-  E = state[4];
-
-  for (;;) {
-    vec_uint32_t vw[20];
-    const uint32_t *w = (const uint32_t *)&vw;
-
-    vec_uint32_t k = K_00_19_x_4;
-    const vec_uint32_t w0 = sched_00_15(vw + 0, data + 0, k);
-    BODY_00_19(0, A, B, C, D, E, T);
-    BODY_00_19(1, T, A, B, C, D, E);
-    BODY_00_19(2, E, T, A, B, C, D);
-    BODY_00_19(3, D, E, T, A, B, C);
-
-    const vec_uint32_t w4 = sched_00_15(vw + 1, data + 16, k);
-    BODY_00_19(4, C, D, E, T, A, B);
-    BODY_00_19(5, B, C, D, E, T, A);
-    BODY_00_19(6, A, B, C, D, E, T);
-    BODY_00_19(7, T, A, B, C, D, E);
-
-    const vec_uint32_t w8 = sched_00_15(vw + 2, data + 32, k);
-    BODY_00_19(8, E, T, A, B, C, D);
-    BODY_00_19(9, D, E, T, A, B, C);
-    BODY_00_19(10, C, D, E, T, A, B);
-    BODY_00_19(11, B, C, D, E, T, A);
-
-    const vec_uint32_t w12 = sched_00_15(vw + 3, data + 48, k);
-    BODY_00_19(12, A, B, C, D, E, T);
-    BODY_00_19(13, T, A, B, C, D, E);
-    BODY_00_19(14, E, T, A, B, C, D);
-    BODY_00_19(15, D, E, T, A, B, C);
-
-    const vec_uint32_t w16 = sched_16_31(vw + 4, w12, w8, w4, w0, k);
-    BODY_00_19(16, C, D, E, T, A, B);
-    BODY_00_19(17, B, C, D, E, T, A);
-    BODY_00_19(18, A, B, C, D, E, T);
-    BODY_00_19(19, T, A, B, C, D, E);
-
-    k = K_20_39_x_4;
-    const vec_uint32_t w20 = sched_16_31(vw + 5, w16, w12, w8, w4, k);
-    BODY_20_39(20, E, T, A, B, C, D);
-    BODY_20_39(21, D, E, T, A, B, C);
-    BODY_20_39(22, C, D, E, T, A, B);
-    BODY_20_39(23, B, C, D, E, T, A);
-
-    const vec_uint32_t w24 = sched_16_31(vw + 6, w20, w16, w12, w8, k);
-    BODY_20_39(24, A, B, C, D, E, T);
-    BODY_20_39(25, T, A, B, C, D, E);
-    BODY_20_39(26, E, T, A, B, C, D);
-    BODY_20_39(27, D, E, T, A, B, C);
-
-    const vec_uint32_t w28 = sched_16_31(vw + 7, w24, w20, w16, w12, k);
-    BODY_20_39(28, C, D, E, T, A, B);
-    BODY_20_39(29, B, C, D, E, T, A);
-    BODY_20_39(30, A, B, C, D, E, T);
-    BODY_20_39(31, T, A, B, C, D, E);
-
-    const vec_uint32_t w32 = sched_32_79(vw + 8, w28, w24, w16, w4, w0, k);
-    BODY_20_39(32, E, T, A, B, C, D);
-    BODY_20_39(33, D, E, T, A, B, C);
-    BODY_20_39(34, C, D, E, T, A, B);
-    BODY_20_39(35, B, C, D, E, T, A);
-
-    const vec_uint32_t w36 = sched_32_79(vw + 9, w32, w28, w20, w8, w4, k);
-    BODY_20_39(36, A, B, C, D, E, T);
-    BODY_20_39(37, T, A, B, C, D, E);
-    BODY_20_39(38, E, T, A, B, C, D);
-    BODY_20_39(39, D, E, T, A, B, C);
-
-    k = K_40_59_x_4;
-    const vec_uint32_t w40 = sched_32_79(vw + 10, w36, w32, w24, w12, w8, k);
-    BODY_40_59(40, C, D, E, T, A, B);
-    BODY_40_59(41, B, C, D, E, T, A);
-    BODY_40_59(42, A, B, C, D, E, T);
-    BODY_40_59(43, T, A, B, C, D, E);
-
-    const vec_uint32_t w44 = sched_32_79(vw + 11, w40, w36, w28, w16, w12, k);
-    BODY_40_59(44, E, T, A, B, C, D);
-    BODY_40_59(45, D, E, T, A, B, C);
-    BODY_40_59(46, C, D, E, T, A, B);
-    BODY_40_59(47, B, C, D, E, T, A);
-
-    const vec_uint32_t w48 = sched_32_79(vw + 12, w44, w40, w32, w20, w16, k);
-    BODY_40_59(48, A, B, C, D, E, T);
-    BODY_40_59(49, T, A, B, C, D, E);
-    BODY_40_59(50, E, T, A, B, C, D);
-    BODY_40_59(51, D, E, T, A, B, C);
-
-    const vec_uint32_t w52 = sched_32_79(vw + 13, w48, w44, w36, w24, w20, k);
-    BODY_40_59(52, C, D, E, T, A, B);
-    BODY_40_59(53, B, C, D, E, T, A);
-    BODY_40_59(54, A, B, C, D, E, T);
-    BODY_40_59(55, T, A, B, C, D, E);
-
-    const vec_uint32_t w56 = sched_32_79(vw + 14, w52, w48, w40, w28, w24, k);
-    BODY_40_59(56, E, T, A, B, C, D);
-    BODY_40_59(57, D, E, T, A, B, C);
-    BODY_40_59(58, C, D, E, T, A, B);
-    BODY_40_59(59, B, C, D, E, T, A);
-
-    k = K_60_79_x_4;
-    const vec_uint32_t w60 = sched_32_79(vw + 15, w56, w52, w44, w32, w28, k);
-    BODY_60_79(60, A, B, C, D, E, T);
-    BODY_60_79(61, T, A, B, C, D, E);
-    BODY_60_79(62, E, T, A, B, C, D);
-    BODY_60_79(63, D, E, T, A, B, C);
-
-    const vec_uint32_t w64 = sched_32_79(vw + 16, w60, w56, w48, w36, w32, k);
-    BODY_60_79(64, C, D, E, T, A, B);
-    BODY_60_79(65, B, C, D, E, T, A);
-    BODY_60_79(66, A, B, C, D, E, T);
-    BODY_60_79(67, T, A, B, C, D, E);
-
-    const vec_uint32_t w68 = sched_32_79(vw + 17, w64, w60, w52, w40, w36, k);
-    BODY_60_79(68, E, T, A, B, C, D);
-    BODY_60_79(69, D, E, T, A, B, C);
-    BODY_60_79(70, C, D, E, T, A, B);
-    BODY_60_79(71, B, C, D, E, T, A);
-
-    const vec_uint32_t w72 = sched_32_79(vw + 18, w68, w64, w56, w44, w40, k);
-    BODY_60_79(72, A, B, C, D, E, T);
-    BODY_60_79(73, T, A, B, C, D, E);
-    BODY_60_79(74, E, T, A, B, C, D);
-    BODY_60_79(75, D, E, T, A, B, C);
-
-    // We don't use the last value
-    (void)sched_32_79(vw + 19, w72, w68, w60, w48, w44, k);
-    BODY_60_79(76, C, D, E, T, A, B);
-    BODY_60_79(77, B, C, D, E, T, A);
-    BODY_60_79(78, A, B, C, D, E, T);
-    BODY_60_79(79, T, A, B, C, D, E);
-
-    const uint32_t mask = 0xffffffffUL;
-    state[0] = (state[0] + E) & mask;
-    state[1] = (state[1] + T) & mask;
-    state[2] = (state[2] + A) & mask;
-    state[3] = (state[3] + B) & mask;
-    state[4] = (state[4] + C) & mask;
-
-    data += 64;
-    if (--num == 0) {
-      break;
-    }
-
-    A = state[0];
-    B = state[1];
-    C = state[2];
-    D = state[3];
-    E = state[4];
-  }
-}
-
-#endif  // OPENSSL_PPC64LE
-
-#undef K_00_19
-#undef K_20_39
-#undef K_40_59
-#undef K_60_79
-#undef F_00_19
-#undef F_20_39
-#undef F_40_59
-#undef F_60_79
-#undef BODY_00_19
-#undef BODY_20_39
-#undef BODY_40_59
-#undef BODY_60_79

diff --git a/crypto/internal.h b/crypto/internal.h
index 63e6a66..f9a243e 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h

@@ -166,7 +166,7 @@
 
 
 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM) || \
-    defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
+    defined(OPENSSL_AARCH64)
 // OPENSSL_cpuid_setup initializes the platform-specific feature cache.
 void OPENSSL_cpuid_setup(void);
 #endif
@@ -1251,16 +1251,6 @@
 
 #endif  // OPENSSL_ARM || OPENSSL_AARCH64
 
-#if defined(OPENSSL_PPC64LE)
-
-// CRYPTO_is_PPC64LE_vcrypto_capable returns true iff the current CPU supports
-// the Vector.AES category of instructions.
-int CRYPTO_is_PPC64LE_vcrypto_capable(void);
-
-extern unsigned long OPENSSL_ppc64le_hwcap2;
-
-#endif  // OPENSSL_PPC64LE
-
 #if defined(BORINGSSL_DISPATCH_TEST)
 // Runtime CPU dispatch testing support
 

diff --git a/crypto/perlasm/ppc-xlate.pl b/crypto/perlasm/ppc-xlate.pl
deleted file mode 100644
index 1c51577..0000000
--- a/crypto/perlasm/ppc-xlate.pl
+++ /dev/null

@@ -1,320 +0,0 @@
-#! /usr/bin/env perl
-# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
-#
-# Licensed under the OpenSSL license (the "License").  You may not use
-# this file except in compliance with the License.  You can obtain a copy
-# in the file LICENSE in the source distribution or at
-# https://www.openssl.org/source/license.html
-
-my $flavour = shift;
-my $output = shift;
-open STDOUT,">$output" || die "can't open $output: $!";
-
-my %GLOBALS;
-my %TYPES;
-my $dotinlocallabels=($flavour=~/linux/)?1:0;
-
-################################################################
-# directives which need special treatment on different platforms
-################################################################
-my $type = sub {
-    my ($dir,$name,$type) = @_;
-
-    $TYPES{$name} = $type;
-    if ($flavour =~ /linux/) {
-	$name =~ s|^\.||;
-	".type	$name,$type";
-    } else {
-	"";
-    }
-};
-my $globl = sub {
-    my $junk = shift;
-    my $name = shift;
-    my $global = \$GLOBALS{$name};
-    my $type = \$TYPES{$name};
-    my $ret;
-
-    $name =~ s|^\.||;
-
-    SWITCH: for ($flavour) {
-	/aix/		&& do { if (!$$type) {
-				    $$type = "\@function";
-				}
-				if ($$type =~ /function/) {
-				    $name = ".$name";
-				}
-				last;
-			      };
-	/osx/		&& do { $name = "_$name";
-				last;
-			      };
-	/linux.*(32|64le)/
-			&& do {	$ret .= ".globl	$name";
-				if (!$$type) {
-				    $ret .= "\n.type	$name,\@function";
-				    $$type = "\@function";
-				}
-				last;
-			      };
-	/linux.*64/	&& do {	$ret .= ".globl	$name";
-				if (!$$type) {
-				    $ret .= "\n.type	$name,\@function";
-				    $$type = "\@function";
-				}
-				if ($$type =~ /function/) {
-				    $ret .= "\n.section	\".opd\",\"aw\"";
-				    $ret .= "\n.align	3";
-				    $ret .= "\n$name:";
-				    $ret .= "\n.quad	.$name,.TOC.\@tocbase,0";
-				    $ret .= "\n.previous";
-				    $name = ".$name";
-				}
-				last;
-			      };
-    }
-
-    $ret = ".globl	$name" if (!$ret);
-    $$global = $name;
-    $ret;
-};
-my $text = sub {
-    my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
-    $ret = ".abiversion	2\n".$ret	if ($flavour =~ /linux.*64le/);
-    $ret;
-};
-my $machine = sub {
-    my $junk = shift;
-    my $arch = shift;
-    if ($flavour =~ /osx/)
-    {	$arch =~ s/\"//g;
-	$arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
-    }
-    ".machine	$arch";
-};
-my $size = sub {
-    if ($flavour =~ /linux/)
-    {	shift;
-	my $name = shift;
-	my $real = $GLOBALS{$name} ? \$GLOBALS{$name} : \$name;
-	my $ret  = ".size	$$real,.-$$real";
-	$name =~ s|^\.||;
-	if ($$real ne $name) {
-	    $ret .= "\n.size	$name,.-$$real";
-	}
-	$ret;
-    }
-    else
-    {	"";	}
-};
-my $asciz = sub {
-    shift;
-    my $line = join(",",@_);
-    if ($line =~ /^"(.*)"$/)
-    {	".byte	" . join(",",unpack("C*",$1),0) . "\n.align	2";	}
-    else
-    {	"";	}
-};
-my $quad = sub {
-    shift;
-    my @ret;
-    my ($hi,$lo);
-    for (@_) {
-	if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
-	{  $hi=$1?"0x$1":"0"; $lo="0x$2";  }
-	elsif (/^([0-9]+)$/o)
-	{  $hi=$1>>32; $lo=$1&0xffffffff;  } # error-prone with 32-bit perl
-	else
-	{  $hi=undef; $lo=$_; }
-
-	if (defined($hi))
-	{  push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo");  }
-	else
-	{  push(@ret,".quad	$lo");  }
-    }
-    join("\n",@ret);
-};
-
-################################################################
-# simplified mnemonics not handled by at least one assembler
-################################################################
-my $cmplw = sub {
-    my $f = shift;
-    my $cr = 0; $cr = shift if ($#_>1);
-    # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
-    ($flavour =~ /linux.*32/) ?
-	"	.long	".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
-	"	cmplw	".join(',',$cr,@_);
-};
-my $bdnz = sub {
-    my $f = shift;
-    my $bo = $f=~/[\+\-]/ ? 16+9 : 16;	# optional "to be taken" hint
-    "	bc	$bo,0,".shift;
-} if ($flavour!~/linux/);
-my $bltlr = sub {
-    my $f = shift;
-    my $bo = $f=~/\-/ ? 12+2 : 12;	# optional "not to be taken" hint
-    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints
-	"	.long	".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
-	"	bclr	$bo,0";
-};
-my $bnelr = sub {
-    my $f = shift;
-    my $bo = $f=~/\-/ ? 4+2 : 4;	# optional "not to be taken" hint
-    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints
-	"	.long	".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
-	"	bclr	$bo,2";
-};
-my $beqlr = sub {
-    my $f = shift;
-    my $bo = $f=~/-/ ? 12+2 : 12;	# optional "not to be taken" hint
-    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints
-	"	.long	".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
-	"	bclr	$bo,2";
-};
-# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
-# arguments is 64, with "operand out of range" error.
-my $extrdi = sub {
-    my ($f,$ra,$rs,$n,$b) = @_;
-    $b = ($b+$n)&63; $n = 64-$n;
-    "	rldicl	$ra,$rs,$b,$n";
-};
-my $vmr = sub {
-    my ($f,$vx,$vy) = @_;
-    "	vor	$vx,$vy,$vy";
-};
-
-# Some ABIs specify vrsave, special-purpose register #256, as reserved
-# for system use.
-my $no_vrsave = ($flavour =~ /aix|linux64le/);
-my $mtspr = sub {
-    my ($f,$idx,$ra) = @_;
-    if ($idx == 256 && $no_vrsave) {
-	"	or	$ra,$ra,$ra";
-    } else {
-	"	mtspr	$idx,$ra";
-    }
-};
-my $mfspr = sub {
-    my ($f,$rd,$idx) = @_;
-    if ($idx == 256 && $no_vrsave) {
-	"	li	$rd,-1";
-    } else {
-	"	mfspr	$rd,$idx";
-    }
-};
-
-# PowerISA 2.06 stuff
-sub vsxmem_op {
-    my ($f, $vrt, $ra, $rb, $op) = @_;
-    "	.long	".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
-}
-# made-up unaligned memory reference AltiVec/VMX instructions
-my $lvx_u	= sub {	vsxmem_op(@_, 844); };	# lxvd2x
-my $stvx_u	= sub {	vsxmem_op(@_, 972); };	# stxvd2x
-my $lvdx_u	= sub {	vsxmem_op(@_, 588); };	# lxsdx
-my $stvdx_u	= sub {	vsxmem_op(@_, 716); };	# stxsdx
-my $lvx_4w	= sub { vsxmem_op(@_, 780); };	# lxvw4x
-my $stvx_4w	= sub { vsxmem_op(@_, 908); };	# stxvw4x
-
-# PowerISA 2.07 stuff
-sub vcrypto_op {
-    my ($f, $vrt, $vra, $vrb, $op) = @_;
-    "	.long	".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
-}
-my $vcipher	= sub { vcrypto_op(@_, 1288); };
-my $vcipherlast	= sub { vcrypto_op(@_, 1289); };
-my $vncipher	= sub { vcrypto_op(@_, 1352); };
-my $vncipherlast= sub { vcrypto_op(@_, 1353); };
-my $vsbox	= sub { vcrypto_op(@_, 0, 1480); };
-my $vshasigmad	= sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
-my $vshasigmaw	= sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
-my $vpmsumb	= sub { vcrypto_op(@_, 1032); };
-my $vpmsumd	= sub { vcrypto_op(@_, 1224); };
-my $vpmsubh	= sub { vcrypto_op(@_, 1096); };
-my $vpmsumw	= sub { vcrypto_op(@_, 1160); };
-my $vaddudm	= sub { vcrypto_op(@_, 192);  };
-
-my $mtsle	= sub {
-    my ($f, $arg) = @_;
-    "	.long	".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
-};
-
-# PowerISA 3.0 stuff
-my $maddhdu = sub {
-    my ($f, $rt, $ra, $rb, $rc) = @_;
-    "	.long	".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|49;
-};
-my $maddld = sub {
-    my ($f, $rt, $ra, $rb, $rc) = @_;
-    "	.long	".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|51;
-};
-
-my $darn = sub {
-    my ($f, $rt, $l) = @_;
-    "	.long	".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1);
-};
-
-print <<___;
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if defined(__has_feature)
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-#endif
-
-#if !defined(OPENSSL_NO_ASM) && defined(__powerpc64__) && defined(__ELF__)
-___
-
-while($line=<>) {
-
-    $line =~ s|[#!;].*$||;	# get rid of asm-style comments...
-    $line =~ s|/\*.*\*/||;	# ... and C-style comments...
-    $line =~ s|^\s+||;		# ... and skip white spaces in beginning...
-    $line =~ s|\s+$||;		# ... and at the end
-
-    {
-	$line =~ s|\.L(\w+)|L$1|g;	# common denominator for Locallabel
-	$line =~ s|\bL(\w+)|\.L$1|g	if ($dotinlocallabels);
-    }
-
-    {
-	$line =~ s|(^[\.\w]+)\:\s*||;
-	my $label = $1;
-	if ($label) {
-	    my $xlated = ($GLOBALS{$label} or $label);
-	    print "$xlated:";
-	    if ($flavour =~ /linux.*64le/) {
-		if ($TYPES{$label} =~ /function/) {
-		    printf "\n.localentry	%s,0\n",$xlated;
-		}
-	    }
-	}
-    }
-
-    {
-	$line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
-	my $c = $1; $c = "\t" if ($c eq "");
-	my $mnemonic = $2;
-	my $f = $3;
-	my $opcode = eval("\$$mnemonic");
-	$line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
-	if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
-	elsif ($mnemonic)           { $line = $c.$mnemonic.$f."\t".$line; }
-    }
-
-    print $line if ($line);
-    print "\n";
-}
-
-print <<___;
-#endif  // !OPENSSL_NO_ASM && __powerpc64__ && __ELF__
-#if defined(__ELF__)
-// See https://www.airs.com/blog/archives/518.
-.section .note.GNU-stack,"",\%progbits
-#endif
-___
-
-close STDOUT or die "error closing STDOUT: $!";

diff --git a/crypto/test/abi_test.h b/crypto/test/abi_test.h
index 1ba82b1..24340c9 100644
--- a/crypto/test/abi_test.h
+++ b/crypto/test/abi_test.h

@@ -179,78 +179,7 @@
   CALLER_STATE_REGISTER(uint64_t, x28)                               \
   CALLER_STATE_REGISTER(uint64_t, x29)
 
-#elif defined(OPENSSL_PPC64LE)
-
-// CRReg only compares the CR2-CR4 bits of a CR register.
-struct CRReg {
-  uint32_t masked() const { return value & 0x00fff000; }
-  bool operator==(CRReg r) const { return masked() == r.masked(); }
-  bool operator!=(CRReg r) const { return masked() != r.masked(); }
-  uint32_t value;
-};
-
-// References:
-// ELFv2: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
-//
-// Note vector and floating-point registers on POWER have two different names.
-// Originally, there were 32 floating-point registers and 32 vector registers,
-// labelled f0-f31 and v0-v31 respectively. Later, VSX (Vector Scalar Extension)
-// unified them into 64 registers vs0-vs63. f0-f31 map to the lower halves of
-// vs0-vs31. v0-v31 map to vs32-vs63. The ABI was defined in terms of pre-VSX
-// names, so we use those names here. In particular, f14-f31 are
-// callee-saved, but the upper halves of vs14-vs31 are not.
-#define LOOP_CALLER_STATE_REGISTERS()  \
-  CALLER_STATE_REGISTER(Reg128, v20)   \
-  CALLER_STATE_REGISTER(Reg128, v21)   \
-  CALLER_STATE_REGISTER(Reg128, v22)   \
-  CALLER_STATE_REGISTER(Reg128, v23)   \
-  CALLER_STATE_REGISTER(Reg128, v24)   \
-  CALLER_STATE_REGISTER(Reg128, v25)   \
-  CALLER_STATE_REGISTER(Reg128, v26)   \
-  CALLER_STATE_REGISTER(Reg128, v27)   \
-  CALLER_STATE_REGISTER(Reg128, v28)   \
-  CALLER_STATE_REGISTER(Reg128, v29)   \
-  CALLER_STATE_REGISTER(Reg128, v30)   \
-  CALLER_STATE_REGISTER(Reg128, v31)   \
-  CALLER_STATE_REGISTER(uint64_t, r14) \
-  CALLER_STATE_REGISTER(uint64_t, r15) \
-  CALLER_STATE_REGISTER(uint64_t, r16) \
-  CALLER_STATE_REGISTER(uint64_t, r17) \
-  CALLER_STATE_REGISTER(uint64_t, r18) \
-  CALLER_STATE_REGISTER(uint64_t, r19) \
-  CALLER_STATE_REGISTER(uint64_t, r20) \
-  CALLER_STATE_REGISTER(uint64_t, r21) \
-  CALLER_STATE_REGISTER(uint64_t, r22) \
-  CALLER_STATE_REGISTER(uint64_t, r23) \
-  CALLER_STATE_REGISTER(uint64_t, r24) \
-  CALLER_STATE_REGISTER(uint64_t, r25) \
-  CALLER_STATE_REGISTER(uint64_t, r26) \
-  CALLER_STATE_REGISTER(uint64_t, r27) \
-  CALLER_STATE_REGISTER(uint64_t, r28) \
-  CALLER_STATE_REGISTER(uint64_t, r29) \
-  CALLER_STATE_REGISTER(uint64_t, r30) \
-  CALLER_STATE_REGISTER(uint64_t, r31) \
-  CALLER_STATE_REGISTER(uint64_t, f14) \
-  CALLER_STATE_REGISTER(uint64_t, f15) \
-  CALLER_STATE_REGISTER(uint64_t, f16) \
-  CALLER_STATE_REGISTER(uint64_t, f17) \
-  CALLER_STATE_REGISTER(uint64_t, f18) \
-  CALLER_STATE_REGISTER(uint64_t, f19) \
-  CALLER_STATE_REGISTER(uint64_t, f20) \
-  CALLER_STATE_REGISTER(uint64_t, f21) \
-  CALLER_STATE_REGISTER(uint64_t, f22) \
-  CALLER_STATE_REGISTER(uint64_t, f23) \
-  CALLER_STATE_REGISTER(uint64_t, f24) \
-  CALLER_STATE_REGISTER(uint64_t, f25) \
-  CALLER_STATE_REGISTER(uint64_t, f26) \
-  CALLER_STATE_REGISTER(uint64_t, f27) \
-  CALLER_STATE_REGISTER(uint64_t, f28) \
-  CALLER_STATE_REGISTER(uint64_t, f29) \
-  CALLER_STATE_REGISTER(uint64_t, f30) \
-  CALLER_STATE_REGISTER(uint64_t, f31) \
-  CALLER_STATE_REGISTER(CRReg, cr)
-
-#endif  // X86_64 || X86 || ARM || AARCH64 || PPC64LE
+#endif  // X86_64 || X86 || ARM || AARCH64
 
 // Enable ABI testing if all of the following are true.
 //
@@ -302,12 +231,6 @@
   // on 32-bit architectures for simplicity.
   static_assert(sizeof(T) == 4, "parameter types must be word-sized");
   return (crypto_word_t)t;
-#elif defined(OPENSSL_PPC64LE)
-  // ELFv2, section 2.2.2.3 says the parameter save area sign- or zero-extends
-  // parameters passed in memory. Section 2.2.3 is unclear on how to handle
-  // register parameters, but section 2.2.2.3 additionally says that the memory
-  // copy of a parameter is identical to the register one.
-  return (crypto_word_t)t;
 #elif defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)
   // AAPCS64, section 5.4.2, clauses C.7 and C.14 says any remaining bits in
   // aarch are unspecified. iOS64 contradicts this and says the callee extends
@@ -362,9 +285,9 @@
 template <typename R, typename... Args>
 inline crypto_word_t CheckImpl(Result *out, bool unwind, R (*func)(Args...),
                                typename DeductionGuard<Args>::Type... args) {
-  // We only support up to 8 arguments, so all arguments on aarch64 and ppc64le
-  // are passed in registers. This is simpler and avoids the iOS discrepancy
-  // around packing small arguments on the stack. (See the iOS64 reference.)
+  // We only support up to 8 arguments, so all arguments on aarch64 are passed
+  // in registers. This is simpler and avoids the iOS discrepancy around packing
+  // small arguments on the stack. (See the iOS64 reference.)
   static_assert(sizeof...(args) <= 8,
                 "too many arguments for abi_test_trampoline");
 

diff --git a/crypto/test/asm/trampoline-ppc.pl b/crypto/test/asm/trampoline-ppc.pl
deleted file mode 100755
index b29c361..0000000
--- a/crypto/test/asm/trampoline-ppc.pl
+++ /dev/null

@@ -1,262 +0,0 @@
-#!/usr/bin/env perl
-# Copyright (c) 2019, Google Inc.
-#
-# Permission to use, copy, modify, and/or distribute this software for any
-# purpose with or without fee is hereby granted, provided that the above
-# copyright notice and this permission notice appear in all copies.
-#
-# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
-# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
-# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-# This file defines helper functions for crypto/test/abi_test.h on ppc64le. See
-# that header for details on how to use this.
-#
-# For convenience, this file is linked into libcrypto, where consuming builds
-# already support architecture-specific sources. The static linker should drop
-# this code in non-test binaries. This includes a shared library build of
-# libcrypto, provided --gc-sections or equivalent is used.
-#
-# References:
-#
-# ELFv2: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
-
-use strict;
-
-my $flavour = shift;
-my $output  = shift;
-if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/;
-my $dir = $1;
-my $xlate;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-
-open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
-*STDOUT = *OUT;
-
-unless ($flavour =~ /linux.*64le/) {
-    die "This file only supports the ELFv2 ABI, used by ppc64le";
-}
-
-my $code = "";
-
-sub load_or_store_regs {
-  # $op is "l" or "st".
-  my ($op, $base_reg, $base_offset) = @_;
-  # Vector registers.
-  foreach (20..31) {
-    my $offset = $base_offset + ($_ - 20) * 16;
-    # Vector registers only support indexed register addressing.
-    $code .= "\tli\tr11, $offset\n";
-    $code .= "\t${op}vx\tv$_, r11, $base_reg\n";
-  }
-  # Save general registers.
-  foreach (14..31) {
-    my $offset = $base_offset + 192 + ($_ - 14) * 8;
-    $code .= "\t${op}d\tr$_, $offset($base_reg)\n";
-  }
-  # Save floating point registers.
-  foreach (14..31) {
-    my $offset = $base_offset + 336 + ($_ - 14) * 8;
-    $code .= "\t${op}fd\tf$_, $offset($base_reg)\n";
-  }
-}
-
-sub load_regs {
-  my ($base_reg, $base_offset) = @_;
-  load_or_store_regs("l", $base_reg, $base_offset);
-}
-
-sub store_regs {
-  my ($base_reg, $base_offset) = @_;
-  load_or_store_regs("st", $base_reg, $base_offset);
-}
-
-my ($func, $state, $argv, $argc) = ("r3", "r4", "r5", "r6");
-$code .= <<____;
-.machine	"any"
-.text
-
-# abi_test_trampoline loads callee-saved registers from |state|, calls |func|
-# with |argv|, then saves the callee-saved registers into |state|. It returns
-# the result of |func|. The |unwind| argument is unused.
-# uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
-#                              const uint64_t *argv, size_t argc,
-#                              uint64_t unwind);
-.globl	abi_test_trampoline
-.align	5
-abi_test_trampoline:
-	# LR is saved into the caller's stack frame.
-	mflr	r0
-	std	r0, 16(r1)
-
-	# Allocate 66*8 = 528 bytes of stack frame. From the top of the stack
-	# to the bottom, the stack frame is:
-	#
-	#     0(r1) - Back chain pointer
-	#     8(r1) - CR save area
-	#    16(r1) - LR save area (for |func|)
-	#    24(r1) - TOC pointer save area
-	#    32(r1) - Saved copy of |state|
-	#    40(r1) - Padding
-	#    48(r1) - Vector register save area (v20-v31, 12 registers)
-	#   240(r1) - General register save area (r14-r31, 18 registers)
-	#   384(r1) - Floating point register save area (f14-f31, 18 registers)
-	#
-	# Note the layouts of the register save areas and CallerState match.
-	#
-	# In the ELFv2 ABI, the parameter save area is optional if the function
-	# is non-variadic and all parameters fit in registers. We only support
-	# such functions, so we omit it to test that |func| does not rely on it.
-	stdu	r1, -528(r1)
-
-	mfcr	r0
-	std	r0, 8(r1)	# Save CR
-	std	r2, 24(r1)	# Save TOC
-	std	$state, 32(r1)	# Save |state|
-____
-# Save registers to the stack.
-store_regs("r1", 48);
-# Load registers from the caller.
-load_regs($state, 0);
-$code .= <<____;
-	# Load CR from |state|.
-	ld	r0, 480($state)
-	mtcr	r0
-
-	# Move parameters into temporary registers so they are not clobbered.
-	addi	r11, $argv, -8	# Adjust for ldu below
-	mr	r12, $func
-
-	# Load parameters into registers.
-	cmpdi	$argc, 0
-	beq	.Largs_done
-	mtctr	$argc
-	ldu	r3, 8(r11)
-	bdz	.Largs_done
-	ldu	r4, 8(r11)
-	bdz	.Largs_done
-	ldu	r5, 8(r11)
-	bdz	.Largs_done
-	ldu	r6, 8(r11)
-	bdz	.Largs_done
-	ldu	r7, 8(r11)
-	bdz	.Largs_done
-	ldu	r8, 8(r11)
-	bdz	.Largs_done
-	ldu	r9, 8(r11)
-	bdz	.Largs_done
-	ldu	r10, 8(r11)
-
-.Largs_done:
-	li	r2, 0		# Clear TOC to test |func|'s global entry point
-	mtctr	r12
-	bctrl
-	ld	r2, 24(r1)	# Restore TOC
-
-	ld	$state, 32(r1)	# Reload |state|
-____
-# Output resulting registers to the caller.
-store_regs($state, 0);
-# Restore registers from the stack.
-load_regs("r1", 48);
-$code .= <<____;
-	mfcr	r0
-	std	r0, 480($state)	# Output CR to caller
-	ld	r0, 8(r1)
-	mtcrf	0b00111000, r0	# Restore CR2-CR4
-	addi	r1, r1, 528
-	ld	r0, 16(r1)	# Restore LR
-	mtlr	r0
-	blr
-.size	abi_test_trampoline,.-abi_test_trampoline
-____
-
-# abi_test_clobber_* clobbers the corresponding register. These are used to test
-# the ABI-testing framework.
-foreach (0..31) {
-  # r1 is the stack pointer. r13 is the thread pointer.
-  next if ($_ == 1 || $_ == 13);
-  $code .= <<____;
-.globl	abi_test_clobber_r$_
-.align	5
-abi_test_clobber_r$_:
-	li	r$_, 0
-	blr
-.size	abi_test_clobber_r$_,.-abi_test_clobber_r$_
-____
-}
-
-foreach (0..31) {
-  $code .= <<____;
-.globl	abi_test_clobber_f$_
-.align	4
-abi_test_clobber_f$_:
-	li	r0, 0
-	# Use the red zone.
-	std	r0, -8(r1)
-	lfd	f$_, -8(r1)
-	blr
-.size	abi_test_clobber_f$_,.-abi_test_clobber_f$_
-____
-}
-
-foreach (0..31) {
-  $code .= <<____;
-.globl	abi_test_clobber_v$_
-.align	4
-abi_test_clobber_v$_:
-	vxor	v$_, v$_, v$_
-	blr
-.size	abi_test_clobber_v$_,.-abi_test_clobber_v$_
-____
-}
-
-foreach (0..7) {
-  # PPC orders CR fields in big-endian, so the mask is reversed from what one
-  # would expect.
-  my $mask = 1 << (7 - $_);
-  $code .= <<____;
-.globl	abi_test_clobber_cr$_
-.align	4
-abi_test_clobber_cr$_:
-	# Flip the bits on cr$_ rather than setting to zero. With a four-bit
-	# register, zeroing it will do nothing 1 in 16 times.
-	mfcr	r0
-	not	r0, r0
-	mtcrf	$mask, r0
-	blr
-.size	abi_test_clobber_cr$_,.-abi_test_clobber_cr$_
-____
-}
-
-$code .= <<____;
-.globl	abi_test_clobber_ctr
-.align	4
-abi_test_clobber_ctr:
-	li	r0, 0
-	mtctr	r0
-	blr
-.size	abi_test_clobber_ctr,.-abi_test_clobber_ctr
-
-.globl	abi_test_clobber_lr
-.align	4
-abi_test_clobber_lr:
-	mflr	r0
-	mtctr	r0
-	li	r0, 0
-	mtlr	r0
-	bctr
-.size	abi_test_clobber_lr,.-abi_test_clobber_lr
-
-____
-
-print $code;
-close STDOUT or die "error closing STDOUT: $!";

diff --git a/util/BUILD.toplevel b/util/BUILD.toplevel
index e0d3148..c314389 100644
--- a/util/BUILD.toplevel
+++ b/util/BUILD.toplevel

@@ -24,7 +24,6 @@
     "crypto_sources_apple_x86_64",
     "crypto_sources_linux_aarch64",
     "crypto_sources_linux_arm",
-    "crypto_sources_linux_ppc64le",
     "crypto_sources_linux_x86",
     "crypto_sources_linux_x86_64",
     "fips_fragments",
@@ -65,14 +64,6 @@
     ]
 ]
 
-config_setting(
-    name = "linux_ppc64le",
-    constraint_values = [
-        "@platforms//os:linux",
-        "@platforms//cpu:ppc",
-    ],
-)
-
 posix_copts = [
     # Assembler option --noexecstack adds .note.GNU-stack to each object to
     # ensure that binaries can be built with non-executable stack.
@@ -110,7 +101,6 @@
 
 # These selects must be kept in sync.
 crypto_sources_asm = select({
-    ":linux_ppc64le": crypto_sources_linux_ppc64le,
     ":linux_armv7": crypto_sources_linux_arm,
     ":linux_arm64": crypto_sources_linux_aarch64,
     ":linux_x86_32": crypto_sources_linux_x86,
@@ -138,7 +128,6 @@
     "//conditions:default": [],
 })
 boringssl_copts += select({
-    ":linux_ppc64le": [],
     ":linux_armv7": [],
     ":linux_arm64": [],
     ":linux_x86_32": [],

diff --git a/util/fipstools/delocate/delocate.peg b/util/fipstools/delocate/delocate.peg
index 0ffecea..6ec5f7a 100644
--- a/util/fipstools/delocate/delocate.peg
+++ b/util/fipstools/delocate/delocate.peg

@@ -12,7 +12,7 @@
 # OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
 
-# This is a rough parser for x86-64 and ppc64le assembly designed to work with
+# This is a rough parser for x86-64 and aarch64 assembly designed to work with
 # https://github.com/pointlander/peg. delocate.go has a go:generate line for
 # rebuilding delocate.peg.go from this file.
 

diff --git a/util/generate_build_files.py b/util/generate_build_files.py
index 4a93a7f..c319a55 100644
--- a/util/generate_build_files.py
+++ b/util/generate_build_files.py

@@ -35,7 +35,6 @@
     ('apple', 'x86_64', 'macosx', [], 'S'),
     ('linux', 'arm', 'linux32', [], 'S'),
     ('linux', 'aarch64', 'linux64', [], 'S'),
-    ('linux', 'ppc64le', 'linux64le', [], 'S'),
     ('linux', 'x86', 'elf', ['-fPIC', '-DOPENSSL_IA32_SSE2'], 'S'),
     ('linux', 'x86_64', 'elf', [], 'S'),
     ('win', 'x86', 'win32n', ['-DOPENSSL_IA32_SSE2'], 'asm'),
@@ -142,7 +141,7 @@
     if asm_outputs:
       blueprint.write('    target: {\n')
       for ((osname, arch), asm_files) in asm_outputs:
-        if osname != 'linux' or arch == 'ppc64le':
+        if osname != 'linux':
           continue
         if arch == 'aarch64':
           arch = 'arm64'
@@ -480,8 +479,6 @@
 elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "mips")
   # Just to avoid the “unknown processor” error.
   set(ARCH "generic")
-elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le")
-  set(ARCH "ppc64le")
 else()
   message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR})
 endif()
commit	5fdc03f74adfd3d270aa6f64b3f3050c19e52e49	[log] [tgz]
author	David Benjamin <davidben@google.com>	Thu Jan 26 18:55:32 2023 -0500
committer	Boringssl LUCI CQ <boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com>	Fri Jan 27 14:19:27 2023 +0000
tree	f255cbf4c928f8ea56fae3f1cf74310fe2e1ddd0
parent	cbccae538c6f03cdf7b9fa263fd1c37724d7a769 [diff]