Add an option to disable SSE2 intrinsics for testing.
We have some code which uses SSE2 intrinsics which, since they don't
have complicated build requirements, is enabled even with
OPENSSL_NO_ASM. x86_64 mandates SSE2 and people building for x86 tend to
mandate it anyway these days. This is great, but we still have generic
32-bit and 64-bit code configurations for other platforms.
32-bit generic code is covered by testing 32-bit ARM with NEON disabled.
However, 64-bit ARM always has NEON available, so we have no SIMD-less
64-bit platforms in our CI.
The immediate motivation is some bitsliced AES code I'm working on,
however I believe this also applies to the existing HRSS code. This also
fixes the HRSS feature checks to only look at __SSE2__, not __SSE__.
__SSE__ isn't sufficient and we don't compile if GCC or Clang is told
-msse -mno-sse2.
Change-Id: Iebb23f1664a2f62e0b4333e0e99f7d5f6c7f384d
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/39204
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b7f468f..75bf998 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -458,6 +458,10 @@
list(GET CMAKE_OSX_ARCHITECTURES 0 CMAKE_SYSTEM_PROCESSOR)
endif()
+if(OPENSSL_NO_SSE2_FOR_TESTING)
+ add_definitions(-DOPENSSL_NO_SSE2_FOR_TESTING)
+endif()
+
if(OPENSSL_NO_ASM)
add_definitions(-DOPENSSL_NO_ASM)
set(ARCH "generic")
diff --git a/crypto/fipsmodule/modes/gcm_nohw.c b/crypto/fipsmodule/modes/gcm_nohw.c
index 4dc3b27..f8618b8 100644
--- a/crypto/fipsmodule/modes/gcm_nohw.c
+++ b/crypto/fipsmodule/modes/gcm_nohw.c
@@ -17,7 +17,7 @@
#include "../../internal.h"
#include "internal.h"
-#if !defined(BORINGSSL_HAS_UINT128) && defined(__SSE2__)
+#if !defined(BORINGSSL_HAS_UINT128) && defined(OPENSSL_SSE2)
#include <emmintrin.h>
#endif
@@ -79,7 +79,7 @@
((uint64_t)(extra >> 64));
}
-#elif defined(__SSE2__)
+#elif defined(OPENSSL_SSE2)
static __m128i gcm_mul32_nohw(uint32_t a, uint32_t b) {
// One term every four bits means the largest term is 32/4 = 8, which does not
@@ -146,7 +146,7 @@
memcpy(out_hi, ((char*)&ret) + 8, 8);
}
-#else // !BORINGSSL_HAS_UINT128 && !__SSE2__
+#else // !BORINGSSL_HAS_UINT128 && !OPENSSL_SSE2
static uint64_t gcm_mul32_nohw(uint32_t a, uint32_t b) {
// One term every four bits means the largest term is 32/4 = 8, which does not
diff --git a/crypto/hrss/hrss.c b/crypto/hrss/hrss.c
index d81a43f..0f66e97 100644
--- a/crypto/hrss/hrss.c
+++ b/crypto/hrss/hrss.c
@@ -24,15 +24,6 @@
#include <openssl/mem.h>
#include <openssl/sha.h>
-#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
-#include <emmintrin.h>
-#endif
-
-#if (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) && \
- (defined(__ARM_NEON__) || defined(__ARM_NEON))
-#include <arm_neon.h>
-#endif
-
#if defined(_MSC_VER)
#define RESTRICT
#else
@@ -42,6 +33,15 @@
#include "../internal.h"
#include "internal.h"
+#if defined(OPENSSL_SSE2)
+#include <emmintrin.h>
+#endif
+
+#if (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) && \
+ (defined(__ARM_NEON__) || defined(__ARM_NEON))
+#include <arm_neon.h>
+#endif
+
// This is an implementation of [HRSS], but with a KEM transformation based on
// [SXY]. The primary references are:
@@ -63,22 +63,15 @@
// 128-bit vector. The following functions abstract over the differences between
// NEON and SSE2 for implementing some vector operations.
-// TODO: MSVC can likely also be made to work with vector operations.
-#if ((defined(__SSE__) && defined(OPENSSL_X86)) || defined(OPENSSL_X86_64)) && \
- (defined(__clang__) || !defined(_MSC_VER))
+// TODO: MSVC can likely also be made to work with vector operations, but ^ must
+// be replaced with _mm_xor_si128, etc.
+#if defined(OPENSSL_SSE2) && (defined(__clang__) || !defined(_MSC_VER))
#define HRSS_HAVE_VECTOR_UNIT
typedef __m128i vec_t;
// vec_capable returns one iff the current platform supports SSE2.
-static int vec_capable(void) {
-#if defined(__SSE2__)
- return 1;
-#else
- int has_sse2 = (OPENSSL_ia32cap_P[0] & (1 << 26)) != 0;
- return has_sse2;
-#endif
-}
+static int vec_capable(void) { return 1; }
// vec_add performs a pair-wise addition of four uint16s from |a| and |b|.
static inline vec_t vec_add(vec_t a, vec_t b) { return _mm_add_epi16(a, b); }
diff --git a/crypto/internal.h b/crypto/internal.h
index 1fba5b6..b75f9af 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -200,6 +200,14 @@
#define OPENSSL_FALLTHROUGH
#endif
+// For convenience in testing 64-bit generic code, we allow disabling SSE2
+// intrinsics via |OPENSSL_NO_SSE2_FOR_TESTING|. x86_64 always has SSE2
+// available, so we would otherwise need to test such code on a non-x86_64
+// platform.
+#if defined(__SSE2__) && !defined(OPENSSL_NO_SSE2_FOR_TESTING)
+#define OPENSSL_SSE2
+#endif
+
// buffers_alias returns one if |a| and |b| alias and zero otherwise.
static inline int buffers_alias(const uint8_t *a, size_t a_len,
const uint8_t *b, size_t b_len) {