Sync sha512-armv8.pl up to 753316232243ccbf86b96c1c51ffcb41651d9ad5.
This imports 753316232243ccbf86b96c1c51ffcb41651d9ad5,
46f4e1bec51dc96fa275c168752aa34359d9ee51, and
32bbb62ea634239e7cb91d6450ba23517082bab6.
The last commit fixes a detection of big-endian aarch64 in the kernel,
which we do not support at all, but is imported to reduce the upstream
diff. Though it points out a messy part of arm_arch.h: __ARMEL__ and
__ARMEB__ are specific to 32-bit ARM. __AARCH64EB__ and __AARCH64EL__
are the 64-bit ones. But OpenSSL's arm_arch.h defines __ARME[LB]__ for
aarch64 and uses it in perlasm. We should fix the files upstream to
look at the aarch64 ones. (Indeed our own base.h assumes __ARMEL__
implies 32-bit ARM.)
Change-Id: I6c2241e103a97e8c3599cdfa43dcc6f30d4a2581
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/50806
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/sha/asm/sha512-armv8.pl b/crypto/fipsmodule/sha/asm/sha512-armv8.pl
index 01154c2..e961312 100644
--- a/crypto/fipsmodule/sha/asm/sha512-armv8.pl
+++ b/crypto/fipsmodule/sha/asm/sha512-armv8.pl
@@ -1,5 +1,5 @@
#! /usr/bin/env perl
-# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
@@ -27,6 +27,7 @@
# Denver 2.01 10.5 (+26%) 6.70 (+8%)
# X-Gene 20.0 (+100%) 12.8 (+300%(***))
# Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
+# Kryo 1.92 17.4 (+30%) 11.2 (+8%)
#
# (*) Software SHA256 results are of lesser relevance, presented
# mostly for informational purposes.
@@ -35,7 +36,7 @@
# on Cortex-A53 (or by 4 cycles per round).
# (***) Super-impressive coefficients over gcc-generated code are
# indication of some compiler "pathology", most notably code
-# generated with -mgeneral-regs-only is significanty faster
+# generated with -mgeneral-regs-only is significantly faster
# and the gap is only 40-90%.
$output=pop;
@@ -89,7 +90,7 @@
$T0=@X[$i+3] if ($i<11);
$code.=<<___ if ($i<16);
-#ifndef __ARMEB__
+#ifndef __AARCH64EB__
rev @X[$i],@X[$i] // $i
#endif
___