Sync sha512-armv8.pl up to 753316232243ccbf86b96c1c51ffcb41651d9ad5. This imports 753316232243ccbf86b96c1c51ffcb41651d9ad5, 46f4e1bec51dc96fa275c168752aa34359d9ee51, and 32bbb62ea634239e7cb91d6450ba23517082bab6. The last commit fixes a detection of big-endian aarch64 in the kernel, which we do not support at all, but is imported to reduce the upstream diff. Though it points out a messy part of arm_arch.h: __ARMEL__ and __ARMEB__ are specific to 32-bit ARM. __AARCH64EB__ and __AARCH64EL__ are the 64-bit ones. But OpenSSL's arm_arch.h defines __ARME[LB]__ for aarch64 and uses it in perlasm. We should fix the files upstream to look at the aarch64 ones. (Indeed our own base.h assumes __ARMEL__ implies 32-bit ARM.) Change-Id: I6c2241e103a97e8c3599cdfa43dcc6f30d4a2581 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/50806 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/sha/asm/sha512-armv8.pl b/crypto/fipsmodule/sha/asm/sha512-armv8.pl index 01154c2..e961312 100644 --- a/crypto/fipsmodule/sha/asm/sha512-armv8.pl +++ b/crypto/fipsmodule/sha/asm/sha512-armv8.pl
@@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -27,6 +27,7 @@ # Denver 2.01 10.5 (+26%) 6.70 (+8%) # X-Gene 20.0 (+100%) 12.8 (+300%(***)) # Mongoose 2.36 13.0 (+50%) 8.36 (+33%) +# Kryo 1.92 17.4 (+30%) 11.2 (+8%) # # (*) Software SHA256 results are of lesser relevance, presented # mostly for informational purposes. @@ -35,7 +36,7 @@ # on Cortex-A53 (or by 4 cycles per round). # (***) Super-impressive coefficients over gcc-generated code are # indication of some compiler "pathology", most notably code -# generated with -mgeneral-regs-only is significanty faster +# generated with -mgeneral-regs-only is significantly faster # and the gap is only 40-90%. $output=pop; @@ -89,7 +90,7 @@ $T0=@X[$i+3] if ($i<11); $code.=<<___ if ($i<16); -#ifndef __ARMEB__ +#ifndef __AARCH64EB__ rev @X[$i],@X[$i] // $i #endif ___