x86_64 assembly pack: "optimize" for Knights Landing, add AVX-512 results.
The changes to the assembly files are synced from upstream's
64d92d74985ebb3d0be58a9718f9e080a14a8e7f. cpu-intel.c is translated to C
from that commit and d84df594404ebbd71d21fec5526178d935e4d88d.
Change-Id: I02c8f83aa4780df301c21f011ef2d8d8300e2f2a
Reviewed-on: https://boringssl-review.googlesource.com/18411
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/sha/asm/sha1-x86_64.pl b/crypto/fipsmodule/sha/asm/sha1-x86_64.pl
index b269e84..f5bc2e6 100755
--- a/crypto/fipsmodule/sha/asm/sha1-x86_64.pl
+++ b/crypto/fipsmodule/sha/asm/sha1-x86_64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -75,9 +82,11 @@
# Haswell 5.45 4.15/+31% 3.57/+53%
# Skylake 5.18 4.06/+28% 3.54/+46%
# Bulldozer 9.11 5.95/+53%
+# Ryzen 4.75 3.80/+24% 1.93/+150%(**)
# VIA Nano 9.32 7.15/+30%
# Atom 10.3 9.17/+12%
# Silvermont 13.1(*) 9.37/+40%
+# Knights L 13.2(*) 9.68/+36% 8.30/+59%
# Goldmont 8.13 6.42/+27% 1.70/+380%(**)
#
# (*) obviously suboptimal result, nothing was done about it,
@@ -537,7 +546,7 @@
$code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n";
}
-sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4
+sub Xupdate_ssse3_16_31() # recall that $Xi starts with 4
{ use integer;
my $body = shift;
my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
@@ -1458,7 +1467,7 @@
)
}
-sub Xupdate_avx2_16_31() # recall that $Xi starts wtih 4
+sub Xupdate_avx2_16_31() # recall that $Xi starts with 4
{ use integer;
my $body = shift;
my @insns = (&$body,&$body,&$body,&$body,&$body); # 35 instructions
diff --git a/crypto/fipsmodule/sha/asm/sha512-x86_64.pl b/crypto/fipsmodule/sha/asm/sha512-x86_64.pl
index 7ad7491..e62ad75 100755
--- a/crypto/fipsmodule/sha/asm/sha512-x86_64.pl
+++ b/crypto/fipsmodule/sha/asm/sha512-x86_64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -88,9 +95,11 @@
# Haswell 12.2 9.28(+31%) 7.80(+56%) 7.66 5.40(+42%)
# Skylake 11.4 9.03(+26%) 7.70(+48%) 7.25 5.20(+40%)
# Bulldozer 21.1 13.6(+54%) 13.6(+54%(***)) 13.5 8.58(+57%)
+# Ryzen 11.0 9.02(+22%) 2.05(+440%) 7.05 5.67(+20%)
# VIA Nano 23.0 16.5(+39%) - 14.7 -
# Atom 23.0 18.9(+22%) - 14.7 -
# Silvermont 27.4 20.6(+33%) - 17.5 -
+# Knights L 27.4 21.0(+30%) 19.6(+40%) 17.5 12.8(+37%)
# Goldmont 18.9 14.3(+32%) 4.16(+350%) 12.0 -
#
# (*) whichever best applicable, including SHAEXT;
@@ -311,7 +320,6 @@
mov $SZ*5($ctx),$F
mov $SZ*6($ctx),$G
mov $SZ*7($ctx),$H
-
jmp .Lloop
.align 16