Move capability checks in chacha-x86.pl to C
Bug: 673
Change-Id: I7e213dc1bbb62553499666c1b271d97f8c43a3ce
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65870
Reviewed-by: Bob Beck <bbe@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/chacha/asm/chacha-x86.pl b/crypto/chacha/asm/chacha-x86.pl
index ec23a51..ae477fc 100755
--- a/crypto/chacha/asm/chacha-x86.pl
+++ b/crypto/chacha/asm/chacha-x86.pl
@@ -114,26 +114,10 @@
($d,$d_)=($d_,$d);
}
-&static_label("ssse3_shortcut");
&static_label("ssse3_data");
&static_label("pic_point");
-&function_begin("ChaCha20_ctr32");
- &xor ("eax","eax");
- &cmp ("eax",&wparam(2)); # len==0?
- &je (&label("no_data"));
-if ($xmm) {
- &call (&label("pic_point"));
-&set_label("pic_point");
- &blindpop("eax");
- &picmeup("ebp","OPENSSL_ia32cap_P","eax",&label("pic_point"));
- &test (&DWP(0,"ebp"),1<<24); # test FXSR bit
- &jz (&label("x86"));
- &test (&DWP(4,"ebp"),1<<9); # test SSSE3 bit
- &jz (&label("x86"));
- &jmp (&label("ssse3_shortcut"));
-&set_label("x86");
-}
+&function_begin("ChaCha20_ctr32_nohw");
&mov ("esi",&wparam(3)); # key
&mov ("edi",&wparam(4)); # counter and nonce
@@ -355,8 +339,7 @@
&set_label("done");
&stack_pop(33);
-&set_label("no_data");
-&function_end("ChaCha20_ctr32");
+&function_end("ChaCha20_ctr32_nohw");
if ($xmm) {
my ($xa,$xa_,$xb,$xb_,$xc,$xc_,$xd,$xd_)=map("xmm$_",(0..7));
@@ -428,8 +411,11 @@
($xd,$xd_)=($xd_,$xd);
}
-&function_begin("ChaCha20_ssse3");
-&set_label("ssse3_shortcut");
+&function_begin("ChaCha20_ctr32_ssse3");
+ &call (&label("pic_point"));
+&set_label("pic_point");
+ &blindpop("eax");
+
&mov ($out,&wparam(0));
&mov ($inp,&wparam(1));
&mov ($len,&wparam(2));
@@ -751,7 +737,7 @@
}
&set_label("done");
&mov ("esp",&DWP(512,"esp"));
-&function_end("ChaCha20_ssse3");
+&function_end("ChaCha20_ctr32_ssse3");
&align (64);
&set_label("ssse3_data");
diff --git a/crypto/chacha/chacha.c b/crypto/chacha/chacha.c
index 68c0c5d..57ceab6 100644
--- a/crypto/chacha/chacha.c
+++ b/crypto/chacha/chacha.c
@@ -93,7 +93,7 @@
}
#endif
-#if defined(CHACHA20_ASM) || defined(CHACHA20_ASM_NOHW)
+#if defined(CHACHA20_ASM_NOHW)
void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
const uint8_t key[32], const uint8_t nonce[12],
diff --git a/crypto/chacha/chacha_test.cc b/crypto/chacha/chacha_test.cc
index ff7bfd9..c6456f3 100644
--- a/crypto/chacha/chacha_test.cc
+++ b/crypto/chacha/chacha_test.cc
@@ -351,9 +351,6 @@
static void check_abi(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]) {
-#if defined(CHACHA20_ASM)
- CHECK_ABI(ChaCha20_ctr32, out, in, in_len, key, counter);
-#endif
#if defined(CHACHA20_ASM_NEON)
if (ChaCha20_ctr32_neon_capable(in_len)) {
CHECK_ABI(ChaCha20_ctr32_neon, out, in, in_len, key, counter);
diff --git a/crypto/chacha/internal.h b/crypto/chacha/internal.h
index 48eb033..90024c3 100644
--- a/crypto/chacha/internal.h
+++ b/crypto/chacha/internal.h
@@ -31,7 +31,16 @@
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86)
-#define CHACHA20_ASM
+#define CHACHA20_ASM_NOHW
+
+#define CHACHA20_ASM_SSSE3
+OPENSSL_INLINE int ChaCha20_ctr32_ssse3_capable(size_t len) {
+ // Unlike the x86_64 version, the x86 SSSE3 routine runs for all non-zero
+ // lengths.
+ return len > 0 && CRYPTO_is_SSSE3_capable() && CRYPTO_is_FXSR_capable();
+}
+void ChaCha20_ctr32_ssse3(uint8_t *out, const uint8_t *in, size_t in_len,
+ const uint32_t key[8], const uint32_t counter[4]);
#elif !defined(OPENSSL_NO_ASM) && \
(defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
@@ -40,7 +49,7 @@
#define CHACHA20_ASM_NEON
OPENSSL_INLINE int ChaCha20_ctr32_neon_capable(size_t len) {
- return (len >= 192) && CRYPTO_is_NEON_capable();
+ return len >= 192 && CRYPTO_is_NEON_capable();
}
void ChaCha20_ctr32_neon(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);
@@ -49,15 +58,15 @@
#define CHACHA20_ASM_AVX2
OPENSSL_INLINE int ChaCha20_ctr32_avx2_capable(size_t len) {
- return (len > 128) && CRYPTO_is_AVX2_capable();
+ return len > 128 && CRYPTO_is_AVX2_capable();
}
void ChaCha20_ctr32_avx2(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);
#define CHACHA20_ASM_SSSE3_4X
OPENSSL_INLINE int ChaCha20_ctr32_ssse3_4x_capable(size_t len) {
- int capable = (len > 128) && CRYPTO_is_SSSE3_capable();
- int faster = (len > 192) || !CRYPTO_cpu_perf_is_like_silvermont();
+ int capable = len > 128 && CRYPTO_is_SSSE3_capable();
+ int faster = len > 192 || !CRYPTO_cpu_perf_is_like_silvermont();
return capable && faster;
}
void ChaCha20_ctr32_ssse3_4x(uint8_t *out, const uint8_t *in, size_t in_len,
@@ -65,27 +74,22 @@
#define CHACHA20_ASM_SSSE3
OPENSSL_INLINE int ChaCha20_ctr32_ssse3_capable(size_t len) {
- return (len > 128) && CRYPTO_is_SSSE3_capable();
+ return len > 128 && CRYPTO_is_SSSE3_capable();
}
void ChaCha20_ctr32_ssse3(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);
#endif
-#if defined(CHACHA20_ASM)
-// ChaCha20_ctr32 encrypts |in_len| bytes from |in| and writes the result to
-// |out|. If |in| and |out| alias, they must be equal.
+#if defined(CHACHA20_ASM_NOHW)
+// ChaCha20_ctr32_nohw encrypts |in_len| bytes from |in| and writes the result
+// to |out|. If |in| and |out| alias, they must be equal. |in_len| may not be
+// zero.
//
// |counter[0]| is the initial 32-bit block counter, and the remainder is the
// 96-bit nonce. If the counter overflows, the output is undefined. The function
// will produce output, but the output may vary by machine and may not be
// self-consistent. (On some architectures, the assembly implements a mix of
// 64-bit and 32-bit counters.)
-void ChaCha20_ctr32(uint8_t *out, const uint8_t *in, size_t in_len,
- const uint32_t key[8], const uint32_t counter[4]);
-#endif
-
-#if defined(CHACHA20_ASM_NOHW)
-// ChaCha20_ctr32_nohw is like |ChaCha20_ctr32| except |in_len| must be nonzero.
void ChaCha20_ctr32_nohw(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);
#endif