Add benchmarks for hash functions to bssl speed.
diff --git a/crypto/sha/asm/sha1-586.pl b/crypto/sha/asm/sha1-586.pl
index acc4f63..81252a6 100644
--- a/crypto/sha/asm/sha1-586.pl
+++ b/crypto/sha/asm/sha1-586.pl
@@ -128,6 +128,8 @@
`ml 2>&1` =~ /Version ([0-9]+)\./ &&
$1>=10); # first version supporting AVX
+$shaext=$xmm; ### set to zero if compiling for 1.0.1
+
&external_label("OPENSSL_ia32cap_P") if ($xmm);
@@ -307,7 +309,7 @@
&function_begin("sha1_block_data_order");
if ($xmm) {
- &static_label("shaext_shortcut");
+ &static_label("shaext_shortcut") if ($shaext);
&static_label("ssse3_shortcut");
&static_label("avx_shortcut") if ($ymm);
&static_label("K_XX_XX");
@@ -325,8 +327,10 @@
&mov ($C,&DWP(8,$T));
&test ($A,1<<24); # check FXSR bit
&jz (&label("x86"));
- &test ($C,1<<29); # check SHA bit
- &jnz (&label("shaext_shortcut"));
+ if ($shaext) {
+ &test ($C,1<<29); # check SHA bit
+ &jnz (&label("shaext_shortcut"));
+ }
if ($ymm) {
&and ($D,1<<28); # mask AVX bit
&and ($A,1<<30); # mask "Intel CPU" bit
@@ -405,7 +409,7 @@
&function_end("sha1_block_data_order");
if ($xmm) {
-{
+if ($shaext) {
######################################################################
# Intel SHA Extensions implementation of SHA1 update function.
#
diff --git a/crypto/sha/asm/sha1-x86_64.pl b/crypto/sha/asm/sha1-x86_64.pl
index ea288c1..01010cf 100644
--- a/crypto/sha/asm/sha1-x86_64.pl
+++ b/crypto/sha/asm/sha1-x86_64.pl
@@ -107,6 +107,9 @@
$avx = ($1>=10) + ($1>=11);
}
+$shaext=0; ### set to zero if compiling for 1.0.1
+$avx=1 if (!$shaext && $avx);
+
open OUT,"| \"$^X\" $xlate $flavour $output";
*STDOUT=*OUT;
@@ -245,7 +248,8 @@
mov OPENSSL_ia32cap_P+8(%rip),%r10d
test \$`1<<9`,%r8d # check SSSE3 bit
jz .Lialu
-
+___
+$code.=<<___ if ($shaext);
test \$`1<<29`,%r10d # check SHA bit
jnz _shaext_shortcut
___
@@ -321,7 +325,7 @@
ret
.size sha1_block_data_order,.-sha1_block_data_order
___
-{{{
+if ($shaext) {{{
######################################################################
# Intel SHA Extensions implementation of SHA1 update function.
#
@@ -1956,9 +1960,13 @@
.rva .LSEH_begin_sha1_block_data_order
.rva .LSEH_end_sha1_block_data_order
.rva .LSEH_info_sha1_block_data_order
+___
+$code.=<<___ if ($shaext);
.rva .LSEH_begin_sha1_block_data_order_shaext
.rva .LSEH_end_sha1_block_data_order_shaext
.rva .LSEH_info_sha1_block_data_order_shaext
+___
+$code.=<<___;
.rva .LSEH_begin_sha1_block_data_order_ssse3
.rva .LSEH_end_sha1_block_data_order_ssse3
.rva .LSEH_info_sha1_block_data_order_ssse3
diff --git a/crypto/sha/asm/sha256-586.pl b/crypto/sha/asm/sha256-586.pl
index 09648a8..ee094a9 100644
--- a/crypto/sha/asm/sha256-586.pl
+++ b/crypto/sha/asm/sha256-586.pl
@@ -82,6 +82,8 @@
$avx = ($1>=10) + ($1>=11);
}
+$shaext=$xmm; ### set to zero if compiling for 1.0.1
+
$unroll_after = 64*4; # If pre-evicted from L1P cache first spin of
# fully unrolled loop was measured to run about
# 3-4x slower. If slowdown coefficient is N and
@@ -205,8 +207,8 @@
&jz ($unroll_after?&label("no_xmm"):&label("loop"));
&and ("ecx",1<<30); # mask "Intel CPU" bit
&and ("ebx",1<<28|1<<9); # mask AVX and SSSE3 bits
- &test ("edx",1<<29) if ($xmm); # check for SHA
- &jnz (&label("shaext")) if ($xmm);
+ &test ("edx",1<<29) if ($shaext); # check for SHA
+ &jnz (&label("shaext")) if ($shaext);
&or ("ecx","ebx");
&and ("ecx",1<<28|1<<30);
&cmp ("ecx",1<<28|1<<30);
@@ -505,7 +507,7 @@
&function_end_A();
}
if (!$i386 && $xmm) {{{
-{
+if ($shaext) {
######################################################################
# Intel SHA Extensions implementation of SHA256 update function.
#
diff --git a/crypto/sha/asm/sha512-x86_64.pl b/crypto/sha/asm/sha512-x86_64.pl
index e2253f1..bc5620d 100644
--- a/crypto/sha/asm/sha512-x86_64.pl
+++ b/crypto/sha/asm/sha512-x86_64.pl
@@ -123,6 +123,9 @@
$avx = ($1>=10) + ($1>=11);
}
+$shaext=1; ### set to zero if compiling for 1.0.1
+$avx=1 if (!$shaext && $avx);
+
open OUT,"| \"$^X\" $xlate $flavour";
*STDOUT=*OUT;
@@ -259,7 +262,7 @@
mov 4(%r11),%r10d
mov 8(%r11),%r11d
___
-$code.=<<___ if ($SZ==4);
+$code.=<<___ if ($SZ==4 && $shaext);
test \$`1<<29`,%r11d # check for SHA
jnz _shaext_shortcut
___
@@ -518,7 +521,7 @@
######################################################################
# SIMD code paths
#
-if ($SZ==4) {{{
+if ($SZ==4 && $shaext) {{{
######################################################################
# Intel SHA Extensions implementation of SHA256 update function.
#
@@ -2295,10 +2298,12 @@
.rva .LSEH_end_$func
.rva .LSEH_info_$func
___
-$code.=<<___ if ($SZ==4);
+$code.=<<___ if ($SZ==4 && $shext);
.rva .LSEH_begin_${func}_shaext
.rva .LSEH_end_${func}_shaext
.rva .LSEH_info_${func}_shaext
+___
+$code.=<<___ if ($SZ==4);
.rva .LSEH_begin_${func}_ssse3
.rva .LSEH_end_${func}_ssse3
.rva .LSEH_info_${func}_ssse3
diff --git a/tool/speed.cc b/tool/speed.cc
index 706d4a2..176e2e2 100644
--- a/tool/speed.cc
+++ b/tool/speed.cc
@@ -22,6 +22,7 @@
#include <openssl/aead.h>
#include <openssl/bio.h>
+#include <openssl/digest.h>
#include <openssl/obj.h>
#include <openssl/rsa.h>
@@ -198,6 +199,41 @@
SpeedAEADChunk(aead, name + " (8192 bytes)", 8192);
}
+static bool SpeedHashChunk(const EVP_MD *md, const std::string &name,
+ size_t chunk_len) {
+ EVP_MD_CTX *ctx = EVP_MD_CTX_create();
+ uint8_t scratch[8192];
+
+ if (chunk_len > sizeof(scratch)) {
+ return false;
+ }
+
+ TimeResults results;
+ if (!TimeFunction(&results, [ctx, md, chunk_len, &scratch]() -> bool {
+ uint8_t digest[EVP_MAX_MD_SIZE];
+ unsigned int md_len;
+
+ return EVP_DigestInit_ex(ctx, md, NULL /* ENGINE */) &&
+ EVP_DigestUpdate(ctx, scratch, chunk_len) &&
+ EVP_DigestFinal_ex(ctx, digest, &md_len);
+ })) {
+ fprintf(stderr, "EVP_DigestInit_ex failed.\n");
+ BIO_print_errors_fp(stderr);
+ return false;
+ }
+
+ results.PrintWithBytes(name, chunk_len);
+
+ EVP_MD_CTX_destroy(ctx);
+
+ return true;
+}
+static bool SpeedHash(const EVP_MD *md, const std::string &name) {
+ return SpeedHashChunk(md, name + " (16 bytes)", 16) &&
+ SpeedHashChunk(md, name + " (256 bytes)", 256) &&
+ SpeedHashChunk(md, name + " (8192 bytes)", 8192);
+}
+
bool Speed(const std::vector<std::string> &args) {
const uint8_t *inp;
@@ -231,7 +267,10 @@
if (!SpeedAEAD(EVP_aead_aes_128_gcm(), "AES-128-GCM") ||
!SpeedAEAD(EVP_aead_aes_256_gcm(), "AES-256-GCM") ||
- !SpeedAEAD(EVP_aead_chacha20_poly1305(), "ChaCha20-Poly1305")) {
+ !SpeedAEAD(EVP_aead_chacha20_poly1305(), "ChaCha20-Poly1305") ||
+ !SpeedHash(EVP_sha1(), "SHA-1") ||
+ !SpeedHash(EVP_sha256(), "SHA-256") ||
+ !SpeedHash(EVP_sha512(), "SHA-512")) {
return false;
}