Move capability checks in sha1-586.pl to C
sha256-586.pl and sha512-586.pl have their own unique challenges, so
I'll do them separately.
Bug: 673
Change-Id: Ic9be0454fddf75e7f49bcccd8a86a4ff8862ff67
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65872
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Bob Beck <bbe@google.com>
diff --git a/crypto/fipsmodule/sha/asm/sha1-586.pl b/crypto/fipsmodule/sha/asm/sha1-586.pl
index 7952636..4be06e7 100644
--- a/crypto/fipsmodule/sha/asm/sha1-586.pl
+++ b/crypto/fipsmodule/sha/asm/sha1-586.pl
@@ -141,8 +141,6 @@
# been tested.
$shaext = 0;
-&external_label("OPENSSL_ia32cap_P") if ($xmm);
-
$A="eax";
$B="ebx";
@@ -318,40 +316,9 @@
}
}
-&function_begin("sha1_block_data_order");
-if ($xmm) {
- &static_label("shaext_shortcut") if ($shaext);
- &static_label("ssse3_shortcut");
- &static_label("avx_shortcut") if ($ymm);
- &static_label("K_XX_XX");
+&static_label("K_XX_XX");
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tmp1);
- &picmeup($T,"OPENSSL_ia32cap_P",$tmp1,&label("pic_point"));
- &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
-
- &mov ($A,&DWP(0,$T));
- &mov ($D,&DWP(4,$T));
- &test ($D,1<<9); # check SSSE3 bit
- &jz (&label("x86"));
- &mov ($C,&DWP(8,$T));
- &test ($A,1<<24); # check FXSR bit
- &jz (&label("x86"));
- if ($shaext) {
- &test ($C,1<<29); # check SHA bit
- &jnz (&label("shaext_shortcut"));
- }
- if ($ymm) {
- &and ($D,1<<28); # mask AVX bit
- &and ($A,1<<30); # mask "Intel CPU" bit
- &or ($A,$D);
- &cmp ($A,1<<28|1<<30);
- &je (&label("avx_shortcut"));
- }
- &jmp (&label("ssse3_shortcut"));
- &set_label("x86",16);
-}
+&function_begin("sha1_block_data_order_nohw");
&mov($tmp1,&wparam(0)); # SHA_CTX *c
&mov($T,&wparam(1)); # const void *input
&mov($A,&wparam(2)); # size_t num
@@ -417,7 +384,7 @@
&jb(&label("loop"));
&stack_pop(16+3);
-&function_end("sha1_block_data_order");
+&function_end("sha1_block_data_order_nohw");
if ($xmm) {
if ($shaext) {
@@ -442,12 +409,11 @@
sub sha1msg1 { sha1op38(0xc9,@_); }
sub sha1msg2 { sha1op38(0xca,@_); }
-&function_begin("_sha1_block_data_order_shaext");
+&function_begin("sha1_block_data_order_shaext");
&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($tmp1);
&lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
-&set_label("shaext_shortcut");
&mov ($ctx,&wparam(0));
&mov ("ebx","esp");
&mov ($inp,&wparam(1));
@@ -529,7 +495,7 @@
&movdqu (&QWP(0,$ctx),$ABCD)
&movd (&DWP(16,$ctx),$E);
&mov ("esp","ebx");
-&function_end("_sha1_block_data_order_shaext");
+&function_end("sha1_block_data_order_shaext");
}
######################################################################
# The SSSE3 implementation.
@@ -565,12 +531,11 @@
my $_rol=sub { &rol(@_) };
my $_ror=sub { &ror(@_) };
-&function_begin("_sha1_block_data_order_ssse3");
+&function_begin("sha1_block_data_order_ssse3");
&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($tmp1);
&lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
-&set_label("ssse3_shortcut");
&movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19
&movdqa (@X[4],&QWP(16,$tmp1)); # K_20_39
@@ -1093,7 +1058,7 @@
&mov (&DWP(12,@T[1]),$D);
&mov (&DWP(16,@T[1]),$E);
-&function_end("_sha1_block_data_order_ssse3");
+&function_end("sha1_block_data_order_ssse3");
$rx=0; # reset
@@ -1108,12 +1073,11 @@
my $_rol=sub { &shld(@_[0],@_) };
my $_ror=sub { &shrd(@_[0],@_) };
-&function_begin("_sha1_block_data_order_avx");
+&function_begin("sha1_block_data_order_avx");
&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($tmp1);
&lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
-&set_label("avx_shortcut");
&vzeroall();
&vmovdqa(@X[3],&QWP(0,$tmp1)); # K_00_19
@@ -1466,7 +1430,7 @@
&mov (&DWP(8,@T[1]),$C);
&mov (&DWP(12,@T[1]),$D);
&mov (&DWP(16,@T[1]),$E);
-&function_end("_sha1_block_data_order_avx");
+&function_end("sha1_block_data_order_avx");
}
&set_label("K_XX_XX",64);
&data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19
diff --git a/crypto/fipsmodule/sha/internal.h b/crypto/fipsmodule/sha/internal.h
index 4a2f081..7082e64 100644
--- a/crypto/fipsmodule/sha/internal.h
+++ b/crypto/fipsmodule/sha/internal.h
@@ -26,20 +26,7 @@
// Define SHA{n}[_{variant}]_ASM if sha{n}_block_data_order[_{variant}] is
// defined in assembly.
-#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86)
-
-#define SHA1_ASM
-#define SHA256_ASM
-#define SHA512_ASM
-
-void sha1_block_data_order(uint32_t state[5], const uint8_t *data,
- size_t num_blocks);
-void sha256_block_data_order(uint32_t state[8], const uint8_t *data,
- size_t num_blocks);
-void sha512_block_data_order(uint64_t state[8], const uint8_t *data,
- size_t num_blocks);
-
-#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM)
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM)
#define SHA1_ASM_NOHW
#define SHA256_ASM_NOHW
@@ -89,6 +76,41 @@
return CRYPTO_is_ARMv8_SHA512_capable();
}
+#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86)
+
+#define SHA1_ASM_NOHW
+
+#define SHA1_ASM_SSSE3
+OPENSSL_INLINE int sha1_ssse3_capable(void) {
+ // TODO(davidben): Do we need to check the FXSR bit? The Intel manual does not
+ // say to.
+ return CRYPTO_is_SSSE3_capable() && CRYPTO_is_FXSR_capable();
+}
+void sha1_block_data_order_ssse3(uint32_t state[5], const uint8_t *data,
+ size_t num);
+
+#define SHA1_ASM_AVX
+OPENSSL_INLINE int sha1_avx_capable(void) {
+ // Pre-Zen AMD CPUs had slow SHLD/SHRD; Zen added the SHA extension; see the
+ // discussion in sha1-586.pl.
+ //
+ // TODO(davidben): Should we enable SHAEXT on 32-bit x86?
+ // TODO(davidben): Do we need to check the FXSR bit? The Intel manual does not
+ // say to.
+ return CRYPTO_is_AVX_capable() && CRYPTO_is_intel_cpu() &&
+ CRYPTO_is_FXSR_capable();
+}
+void sha1_block_data_order_avx(uint32_t state[5], const uint8_t *data,
+ size_t num);
+
+// TODO(crbug.com/boringssl/673): Move the remaining CPU dispatch to C.
+#define SHA256_ASM
+#define SHA512_ASM
+void sha256_block_data_order(uint32_t state[8], const uint8_t *data,
+ size_t num_blocks);
+void sha512_block_data_order(uint64_t state[8], const uint8_t *data,
+ size_t num_blocks);
+
#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
#define SHA1_ASM_NOHW