Move capability checks in sha1-586.pl to C sha256-586.pl and sha512-586.pl have their own unique challenges, so I'll do them separately. Bug: 673 Change-Id: Ic9be0454fddf75e7f49bcccd8a86a4ff8862ff67 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65872 Commit-Queue: David Benjamin <davidben@google.com> Reviewed-by: Bob Beck <bbe@google.com>
diff --git a/crypto/fipsmodule/sha/asm/sha1-586.pl b/crypto/fipsmodule/sha/asm/sha1-586.pl index 7952636..4be06e7 100644 --- a/crypto/fipsmodule/sha/asm/sha1-586.pl +++ b/crypto/fipsmodule/sha/asm/sha1-586.pl
@@ -141,8 +141,6 @@ # been tested. $shaext = 0; -&external_label("OPENSSL_ia32cap_P") if ($xmm); - $A="eax"; $B="ebx"; @@ -318,40 +316,9 @@ } } -&function_begin("sha1_block_data_order"); -if ($xmm) { - &static_label("shaext_shortcut") if ($shaext); - &static_label("ssse3_shortcut"); - &static_label("avx_shortcut") if ($ymm); - &static_label("K_XX_XX"); +&static_label("K_XX_XX"); - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($tmp1); - &picmeup($T,"OPENSSL_ia32cap_P",$tmp1,&label("pic_point")); - &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); - - &mov ($A,&DWP(0,$T)); - &mov ($D,&DWP(4,$T)); - &test ($D,1<<9); # check SSSE3 bit - &jz (&label("x86")); - &mov ($C,&DWP(8,$T)); - &test ($A,1<<24); # check FXSR bit - &jz (&label("x86")); - if ($shaext) { - &test ($C,1<<29); # check SHA bit - &jnz (&label("shaext_shortcut")); - } - if ($ymm) { - &and ($D,1<<28); # mask AVX bit - &and ($A,1<<30); # mask "Intel CPU" bit - &or ($A,$D); - &cmp ($A,1<<28|1<<30); - &je (&label("avx_shortcut")); - } - &jmp (&label("ssse3_shortcut")); - &set_label("x86",16); -} +&function_begin("sha1_block_data_order_nohw"); &mov($tmp1,&wparam(0)); # SHA_CTX *c &mov($T,&wparam(1)); # const void *input &mov($A,&wparam(2)); # size_t num @@ -417,7 +384,7 @@ &jb(&label("loop")); &stack_pop(16+3); -&function_end("sha1_block_data_order"); +&function_end("sha1_block_data_order_nohw"); if ($xmm) { if ($shaext) { @@ -442,12 +409,11 @@ sub sha1msg1 { sha1op38(0xc9,@_); } sub sha1msg2 { sha1op38(0xca,@_); } -&function_begin("_sha1_block_data_order_shaext"); +&function_begin("sha1_block_data_order_shaext"); &call (&label("pic_point")); # make it PIC! &set_label("pic_point"); &blindpop($tmp1); &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); -&set_label("shaext_shortcut"); &mov ($ctx,&wparam(0)); &mov ("ebx","esp"); &mov ($inp,&wparam(1)); @@ -529,7 +495,7 @@ &movdqu (&QWP(0,$ctx),$ABCD) &movd (&DWP(16,$ctx),$E); &mov ("esp","ebx"); -&function_end("_sha1_block_data_order_shaext"); +&function_end("sha1_block_data_order_shaext"); } ###################################################################### # The SSSE3 implementation. @@ -565,12 +531,11 @@ my $_rol=sub { &rol(@_) }; my $_ror=sub { &ror(@_) }; -&function_begin("_sha1_block_data_order_ssse3"); +&function_begin("sha1_block_data_order_ssse3"); &call (&label("pic_point")); # make it PIC! &set_label("pic_point"); &blindpop($tmp1); &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); -&set_label("ssse3_shortcut"); &movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19 &movdqa (@X[4],&QWP(16,$tmp1)); # K_20_39 @@ -1093,7 +1058,7 @@ &mov (&DWP(12,@T[1]),$D); &mov (&DWP(16,@T[1]),$E); -&function_end("_sha1_block_data_order_ssse3"); +&function_end("sha1_block_data_order_ssse3"); $rx=0; # reset @@ -1108,12 +1073,11 @@ my $_rol=sub { &shld(@_[0],@_) }; my $_ror=sub { &shrd(@_[0],@_) }; -&function_begin("_sha1_block_data_order_avx"); +&function_begin("sha1_block_data_order_avx"); &call (&label("pic_point")); # make it PIC! &set_label("pic_point"); &blindpop($tmp1); &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); -&set_label("avx_shortcut"); &vzeroall(); &vmovdqa(@X[3],&QWP(0,$tmp1)); # K_00_19 @@ -1466,7 +1430,7 @@ &mov (&DWP(8,@T[1]),$C); &mov (&DWP(12,@T[1]),$D); &mov (&DWP(16,@T[1]),$E); -&function_end("_sha1_block_data_order_avx"); +&function_end("sha1_block_data_order_avx"); } &set_label("K_XX_XX",64); &data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19
diff --git a/crypto/fipsmodule/sha/internal.h b/crypto/fipsmodule/sha/internal.h index 4a2f081..7082e64 100644 --- a/crypto/fipsmodule/sha/internal.h +++ b/crypto/fipsmodule/sha/internal.h
@@ -26,20 +26,7 @@ // Define SHA{n}[_{variant}]_ASM if sha{n}_block_data_order[_{variant}] is // defined in assembly. -#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) - -#define SHA1_ASM -#define SHA256_ASM -#define SHA512_ASM - -void sha1_block_data_order(uint32_t state[5], const uint8_t *data, - size_t num_blocks); -void sha256_block_data_order(uint32_t state[8], const uint8_t *data, - size_t num_blocks); -void sha512_block_data_order(uint64_t state[8], const uint8_t *data, - size_t num_blocks); - -#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) #define SHA1_ASM_NOHW #define SHA256_ASM_NOHW @@ -89,6 +76,41 @@ return CRYPTO_is_ARMv8_SHA512_capable(); } +#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) + +#define SHA1_ASM_NOHW + +#define SHA1_ASM_SSSE3 +OPENSSL_INLINE int sha1_ssse3_capable(void) { + // TODO(davidben): Do we need to check the FXSR bit? The Intel manual does not + // say to. + return CRYPTO_is_SSSE3_capable() && CRYPTO_is_FXSR_capable(); +} +void sha1_block_data_order_ssse3(uint32_t state[5], const uint8_t *data, + size_t num); + +#define SHA1_ASM_AVX +OPENSSL_INLINE int sha1_avx_capable(void) { + // Pre-Zen AMD CPUs had slow SHLD/SHRD; Zen added the SHA extension; see the + // discussion in sha1-586.pl. + // + // TODO(davidben): Should we enable SHAEXT on 32-bit x86? + // TODO(davidben): Do we need to check the FXSR bit? The Intel manual does not + // say to. + return CRYPTO_is_AVX_capable() && CRYPTO_is_intel_cpu() && + CRYPTO_is_FXSR_capable(); +} +void sha1_block_data_order_avx(uint32_t state[5], const uint8_t *data, + size_t num); + +// TODO(crbug.com/boringssl/673): Move the remaining CPU dispatch to C. +#define SHA256_ASM +#define SHA512_ASM +void sha256_block_data_order(uint32_t state[8], const uint8_t *data, + size_t num_blocks); +void sha512_block_data_order(uint64_t state[8], const uint8_t *data, + size_t num_blocks); + #elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) #define SHA1_ASM_NOHW