Enable __asm__ and uint128_t code in clang-cl.
It actually works fine. I just forgot one of the typedefs last time.
This gives a roughly 2x improvement on P-256 in clang-cl +
OPENSSL_SMALL, the configuration used by Chrome.
Before:
Did 1302 ECDH P-256 operations in 1015000us (1282.8 ops/sec)
Did 4250 ECDSA P-256 signing operations in 1047000us (4059.2 ops/sec)
Did 1750 ECDSA P-256 verify operations in 1094000us (1599.6 ops/sec)
After:
Did 3250 ECDH P-256 operations in 1078000us (3014.8 ops/sec)
Did 8250 ECDSA P-256 signing operations in 1016000us (8120.1 ops/sec)
Did 3250 ECDSA P-256 verify operations in 1063000us (3057.4 ops/sec)
(These were taken on a VM, so the measurements are extremely noisy, but
this sort of improvement is visible regardless.)
Alas, we do need a little extra bit of fiddling because division does
not work (crbug.com/787617).
Bug: chromium:787617
Update-Note: This removes the MSan uint128_t workaround which does not
appear to be necessary anymore.
Change-Id: I8361314608521e5bdaf0e7eeae7a02c33f55c69f
Reviewed-on: https://boringssl-review.googlesource.com/23984
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
diff --git a/crypto/fipsmodule/bn/asm/x86_64-gcc.c b/crypto/fipsmodule/bn/asm/x86_64-gcc.c
index 49351c1..30fff21 100644
--- a/crypto/fipsmodule/bn/asm/x86_64-gcc.c
+++ b/crypto/fipsmodule/bn/asm/x86_64-gcc.c
@@ -52,8 +52,9 @@
#include <openssl/bn.h>
-// TODO(davidben): Get this file working on Windows x64.
-#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__GNUC__)
+// TODO(davidben): Get this file working on MSVC x64.
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
+ (defined(__GNUC__) || defined(__clang__))
#include "../internal.h"
@@ -537,4 +538,4 @@
#undef mul_add_c2
#undef sqr_add_c2
-#endif // !NO_ASM && X86_64 && __GNUC__
+#endif // !NO_ASM && X86_64 && (__GNUC__ || __clang__)
diff --git a/crypto/fipsmodule/bn/div.c b/crypto/fipsmodule/bn/div.c
index c92eab3..7f261f1 100644
--- a/crypto/fipsmodule/bn/div.c
+++ b/crypto/fipsmodule/bn/div.c
@@ -155,18 +155,18 @@
//
// These issues aren't specific to x86 and x86_64, so it might be worthwhile
// to add more assembly language implementations.
-#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__GNUC__)
- __asm__ volatile (
- "divl %4"
- : "=a"(*quotient_out), "=d"(*rem_out)
- : "a"(n1), "d"(n0), "rm"(d0)
- : "cc" );
-#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__GNUC__)
- __asm__ volatile (
- "divq %4"
- : "=a"(*quotient_out), "=d"(*rem_out)
- : "a"(n1), "d"(n0), "rm"(d0)
- : "cc" );
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && \
+ (defined(__GNUC__) || defined(__clang__))
+ __asm__ volatile("divl %4"
+ : "=a"(*quotient_out), "=d"(*rem_out)
+ : "a"(n1), "d"(n0), "rm"(d0)
+ : "cc");
+#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
+ (defined(__GNUC__) || defined(__clang__))
+ __asm__ volatile("divq %4"
+ : "=a"(*quotient_out), "=d"(*rem_out)
+ : "a"(n1), "d"(n0), "rm"(d0)
+ : "cc");
#else
#if defined(BN_ULLONG)
BN_ULLONG n = (((BN_ULLONG)n0) << BN_BITS2) | n1;
@@ -617,7 +617,7 @@
}
BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w) {
-#ifndef BN_ULLONG
+#ifndef BN_CAN_DIVIDE_ULLONG
BN_ULONG ret = 0;
#else
BN_ULLONG ret = 0;
@@ -628,9 +628,9 @@
return (BN_ULONG) -1;
}
-#ifndef BN_ULLONG
- // If |w| is too long and we don't have |BN_ULLONG| then we need to fall back
- // to using |BN_div_word|.
+#ifndef BN_CAN_DIVIDE_ULLONG
+ // If |w| is too long and we don't have |BN_ULLONG| division then we need to
+ // fall back to using |BN_div_word|.
if (w > ((BN_ULONG)1 << BN_BITS4)) {
BIGNUM *tmp = BN_dup(a);
if (tmp == NULL) {
@@ -643,7 +643,7 @@
#endif
for (i = a->top - 1; i >= 0; i--) {
-#ifndef BN_ULLONG
+#ifndef BN_CAN_DIVIDE_ULLONG
ret = ((ret << BN_BITS4) | ((a->d[i] >> BN_BITS4) & BN_MASK2l)) % w;
ret = ((ret << BN_BITS4) | (a->d[i] & BN_MASK2l)) % w;
#else
diff --git a/crypto/fipsmodule/bn/generic.c b/crypto/fipsmodule/bn/generic.c
index a39a033..ee80a3c 100644
--- a/crypto/fipsmodule/bn/generic.c
+++ b/crypto/fipsmodule/bn/generic.c
@@ -64,7 +64,8 @@
// This file has two other implementations: x86 assembly language in
// asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c.
#if defined(OPENSSL_NO_ASM) || \
- !(defined(OPENSSL_X86) || (defined(OPENSSL_X86_64) && defined(__GNUC__)))
+ !(defined(OPENSSL_X86) || \
+ (defined(OPENSSL_X86_64) && (defined(__GNUC__) || defined(__clang__))))
#ifdef BN_ULLONG
#define mul_add(r, a, w, c) \
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index 75efbfa..706e544 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -140,9 +140,12 @@
#if defined(OPENSSL_64_BIT)
-#if !defined(_MSC_VER)
+#if defined(BORINGSSL_HAS_UINT128)
// MSVC doesn't support two-word integers on 64-bit.
#define BN_ULLONG uint128_t
+#if defined(BORINGSSL_CAN_DIVIDE_UINT128)
+#define BN_CAN_DIVIDE_ULLONG
+#endif
#endif
#define BN_BITS2 64
@@ -160,6 +163,7 @@
#elif defined(OPENSSL_32_BIT)
#define BN_ULLONG uint64_t
+#define BN_CAN_DIVIDE_ULLONG
#define BN_BITS2 32
#define BN_BYTES 4
#define BN_BITS4 16
diff --git a/crypto/fipsmodule/ec/ec.c b/crypto/fipsmodule/ec/ec.c
index ed54554..47a90ce 100644
--- a/crypto/fipsmodule/ec/ec.c
+++ b/crypto/fipsmodule/ec/ec.c
@@ -246,18 +246,11 @@
out->curves[2].param_len = 32;
out->curves[2].params = kP256Params;
out->curves[2].method =
-// MSan appears to have a bug that causes code to be miscompiled in opt mode.
-// While that is being looked at, don't run the uint128_t code under MSan.
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
- !defined(OPENSSL_SMALL) && !defined(MEMORY_SANITIZER)
+ !defined(OPENSSL_SMALL)
EC_GFp_nistz256_method();
#else
-#if defined(OPENSSL_32_BIT) || \
- (defined(OPENSSL_64_BIT) && !defined(MEMORY_SANITIZER))
EC_GFp_nistp256_method();
-#else
- EC_GFp_mont_method();
-#endif
#endif
// 1.3.132.0.33
@@ -269,8 +262,7 @@
out->curves[3].param_len = 28;
out->curves[3].params = kP224Params;
out->curves[3].method =
-#if defined(OPENSSL_64_BIT) && !defined(OPENSSL_WINDOWS) && \
- !defined(MEMORY_SANITIZER) && !defined(OPENSSL_SMALL)
+#if defined(BORINGSSL_HAS_UINT128) && !defined(OPENSSL_SMALL)
EC_GFp_nistp224_method();
#else
EC_GFp_mont_method();
diff --git a/crypto/fipsmodule/ec/p224-64.c b/crypto/fipsmodule/ec/p224-64.c
index d0285d6..0097209 100644
--- a/crypto/fipsmodule/ec/p224-64.c
+++ b/crypto/fipsmodule/ec/p224-64.c
@@ -19,9 +19,6 @@
#include <openssl/base.h>
-#if defined(OPENSSL_64_BIT) && !defined(OPENSSL_WINDOWS) && \
- !defined(OPENSSL_SMALL)
-
#include <openssl/bn.h>
#include <openssl/ec.h>
#include <openssl/err.h>
@@ -34,6 +31,8 @@
#include "../../internal.h"
+#if defined(BORINGSSL_HAS_UINT128) && !defined(OPENSSL_SMALL)
+
// Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
// using 64-bit coefficients called 'limbs', and sometimes (for multiplication
// results) as b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 +
@@ -1129,4 +1128,4 @@
out->field_decode = NULL;
};
-#endif // 64_BIT && !WINDOWS && !SMALL
+#endif // BORINGSSL_HAS_UINT128 && !SMALL
diff --git a/crypto/internal.h b/crypto/internal.h
index 76d39b7..5706414 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -151,9 +151,16 @@
#endif
-#if !defined(_MSC_VER) && defined(OPENSSL_64_BIT)
+#if (!defined(_MSC_VER) || defined(__clang__)) && defined(OPENSSL_64_BIT)
+#define BORINGSSL_HAS_UINT128
typedef __int128_t int128_t;
typedef __uint128_t uint128_t;
+
+// clang-cl supports __uint128_t but modulus and division don't work.
+// https://crbug.com/787617.
+#if !defined(_MSC_VER) || !defined(__clang__)
+#define BORINGSSL_CAN_DIVIDE_UINT128
+#endif
#endif
#define OPENSSL_ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0]))
diff --git a/third_party/fiat/p256.c b/third_party/fiat/p256.c
index 19a8284..25ef383 100644
--- a/third_party/fiat/p256.c
+++ b/third_party/fiat/p256.c
@@ -29,11 +29,6 @@
#include <openssl/base.h>
-// MSVC does not implement uint128_t, and crashes with intrinsics
-#if defined(OPENSSL_64_BIT) && !defined(OPENSSL_WINDOWS)
-#define BORINGSSL_NISTP256_64BIT 1
-#endif
-
#include <openssl/bn.h>
#include <openssl/ec.h>
#include <openssl/err.h>
@@ -42,10 +37,15 @@
#include <string.h>
#include "../../crypto/fipsmodule/delocate.h"
-#include "../../crypto/internal.h"
#include "../../crypto/fipsmodule/ec/internal.h"
+#include "../../crypto/internal.h"
+// MSVC does not implement uint128_t, and crashes with intrinsics
+#if defined(BORINGSSL_HAS_UINT128)
+#define BORINGSSL_NISTP256_64BIT 1
+#endif
+
// "intrinsics"
#if defined(BORINGSSL_NISTP256_64BIT)