Add an atomics library to crypto/internal.h

refcount.c is now a single, generic file that calls into C11-atomic-like
APIs. Behind the scenes, this selects one of C11 atomics, Windows
interlocked APIs, or unsynchronized reads/writes (in the no-threads
build).

This frees us up to use atomics elsewhere in the library. For now, this
only binds sequentially consistent atomics, but we can add other memory
orders if needed. In particular, I believe up_ref only needs relaxed
atomics. Some of the later change I think only need acquire and release,
but I'm not positive.

Bug: 570
Cq-Include-Trybots: luci.boringssl.try:linux_clang_rel_tsan
Change-Id: Ifcd7357611bb7a8cd14b82c23ad080d1a2df1386
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/59848
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt
index c427b20..ef47623 100644
--- a/crypto/CMakeLists.txt
+++ b/crypto/CMakeLists.txt
@@ -202,9 +202,7 @@
   rand_extra/rand_extra.c
   rand_extra/windows.c
   rc4/rc4.c
-  refcount_c11.c
-  refcount_no_threads.c
-  refcount_win.c
+  refcount.c
   rsa_extra/rsa_asn1.c
   rsa_extra/rsa_crypt.c
   rsa_extra/rsa_print.c
diff --git a/crypto/internal.h b/crypto/internal.h
index 5c04735..00f0582 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -155,6 +155,32 @@
 #if defined(OPENSSL_THREADS) && !defined(OPENSSL_PTHREADS) && \
     defined(OPENSSL_WINDOWS)
 #define OPENSSL_WINDOWS_THREADS
+#endif
+
+// Determine the atomics implementation to use with C.
+#if !defined(__cplusplus)
+#if !defined(OPENSSL_C11_ATOMIC) && defined(OPENSSL_THREADS) &&   \
+    !defined(__STDC_NO_ATOMICS__) && defined(__STDC_VERSION__) && \
+    __STDC_VERSION__ >= 201112L
+#define OPENSSL_C11_ATOMIC
+#endif
+
+// Older MSVC does not support C11 atomics, so we fallback to the Windows APIs.
+// When both are available (e.g. clang-cl), we prefer the C11 ones. The Windows
+// APIs don't allow some operations to be implemented as efficiently. This can
+// be removed once we can rely on
+// https://devblogs.microsoft.com/cppblog/c11-atomics-in-visual-studio-2022-version-17-5-preview-2/
+#if !defined(OPENSSL_C11_ATOMIC) && defined(OPENSSL_THREADS) && \
+    defined(OPENSSL_WINDOWS)
+#define OPENSSL_WINDOWS_ATOMIC
+#endif
+#endif  // !__cplusplus
+
+#if defined(OPENSSL_C11_ATOMIC)
+#include <stdatomic.h>
+#endif
+
+#if defined(OPENSSL_WINDOWS_THREADS) || defined(OPENSSL_WINDOWS_ATOMIC)
 OPENSSL_MSVC_PRAGMA(warning(push, 3))
 #include <windows.h>
 OPENSSL_MSVC_PRAGMA(warning(pop))
@@ -539,33 +565,102 @@
 OPENSSL_EXPORT void CRYPTO_once(CRYPTO_once_t *once, void (*init)(void));
 
 
-// Reference counting.
+// Atomics.
+//
+// The following functions provide an API analogous to <stdatomic.h> from C11
+// and abstract between a few variations on atomics we need to support.
 
-// Automatically enable C11 atomics if implemented.
-#if !defined(OPENSSL_C11_ATOMIC) && defined(OPENSSL_THREADS) &&   \
-    !defined(__STDC_NO_ATOMICS__) && defined(__STDC_VERSION__) && \
-    __STDC_VERSION__ >= 201112L
-#define OPENSSL_C11_ATOMIC
-#endif
+#if defined(__cplusplus)
 
-// Older MSVC does not support C11 atomics, so we fallback to the Windows APIs.
-// This can be removed once we can rely on
-// https://devblogs.microsoft.com/cppblog/c11-atomics-in-visual-studio-2022-version-17-5-preview-2/
-#if !defined(OPENSSL_C11_ATOMIC) && defined(OPENSSL_THREADS) && \
-    defined(OPENSSL_WINDOWS)
-#define OPENSSL_WINDOWS_ATOMIC
-#endif
+// In C++, we can't easily detect whether C will use |OPENSSL_C11_ATOMIC| or
+// |OPENSSL_WINDOWS_ATOMIC|. Instead, we define a layout-compatible type without
+// the corresponding functions. When we can rely on C11 atomics in MSVC, that
+// will no longer be a concern.
+typedef uint32_t CRYPTO_atomic_u32;
+
+#elif defined(OPENSSL_C11_ATOMIC)
+
+typedef _Atomic uint32_t CRYPTO_atomic_u32;
+
+// This should be const, but the |OPENSSL_WINDOWS_ATOMIC| implementation is not
+// const due to Windows limitations. When we can rely on C11 atomics, make this
+// const-correct.
+OPENSSL_INLINE uint32_t CRYPTO_atomic_load_u32(CRYPTO_atomic_u32 *val) {
+  return atomic_load(val);
+}
+
+OPENSSL_INLINE int CRYPTO_atomic_compare_exchange_weak_u32(
+    CRYPTO_atomic_u32 *val, uint32_t *expected, uint32_t desired) {
+  return atomic_compare_exchange_weak(val, expected, desired);
+}
+
+#elif defined(OPENSSL_WINDOWS_ATOMIC)
+
+typedef LONG CRYPTO_atomic_u32;
+
+OPENSSL_INLINE uint32_t CRYPTO_atomic_load_u32(volatile CRYPTO_atomic_u32 *val) {
+  // This is not ideal because it still writes to a cacheline. MSVC is not able
+  // to optimize this to a true atomic read, and Windows does not provide an
+  // InterlockedLoad function.
+  //
+  // The Windows documentation [1] does say "Simple reads and writes to
+  // properly-aligned 32-bit variables are atomic operations", but this is not
+  // phrased in terms of the C11 and C++11 memory models, and indeed a read or
+  // write seems to produce slightly different code on MSVC than a sequentially
+  // consistent std::atomic::load in C++. Moreover, it is unclear if non-MSVC
+  // compilers on Windows provide the same guarantees. Thus we avoid relying on
+  // this and instead still use an interlocked function. This is still
+  // preferable a global mutex, and eventually this code will be replaced by
+  // [2]. Additionally, on clang-cl, we'll use the |OPENSSL_C11_ATOMIC| path.
+  //
+  // [1] https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access
+  // [2] https://devblogs.microsoft.com/cppblog/c11-atomics-in-visual-studio-2022-version-17-5-preview-2/
+  return (uint32_t)InterlockedCompareExchange(val, 0, 0);
+}
+
+OPENSSL_INLINE int CRYPTO_atomic_compare_exchange_weak_u32(
+    volatile CRYPTO_atomic_u32 *val, uint32_t *expected32, uint32_t desired) {
+  LONG expected = (LONG)*expected32;
+  LONG actual = InterlockedCompareExchange(val, (LONG)desired, expected);
+  *expected32 = (uint32_t)actual;
+  return actual == expected;
+}
+
+#elif !defined(OPENSSL_THREADS)
+
+typedef uint32_t CRYPTO_atomic_u32;
+
+OPENSSL_INLINE uint32_t CRYPTO_atomic_load_u32(CRYPTO_atomic_u32 *val) {
+  return *val;
+}
+
+OPENSSL_INLINE int CRYPTO_atomic_compare_exchange_weak_u32(
+    CRYPTO_atomic_u32 *val, uint32_t *expected, uint32_t desired) {
+  if (*val != *expected) {
+    *expected = *val;
+    return 0;
+  }
+  *val = desired;
+  return 1;
+}
+
+#else
 
 // Require some atomics implementation. Contact BoringSSL maintainers if you
 // have a platform with fails this check.
-//
-// Note this check can only be done in C. From C++, we don't know whether the
-// corresponding C mode would support C11 atomics.
-#if !defined(__cplusplus) && defined(OPENSSL_THREADS) && \
-    !defined(OPENSSL_C11_ATOMIC) && !defined(OPENSSL_WINDOWS_ATOMIC)
 #error "Thread-compatible configurations require atomics"
+
 #endif
 
+// See the comment in the |__cplusplus| section above.
+static_assert(sizeof(CRYPTO_atomic_u32) == sizeof(uint32_t),
+              "CRYPTO_atomic_u32 does not match uint32_t size");
+static_assert(alignof(CRYPTO_atomic_u32) == alignof(uint32_t),
+              "CRYPTO_atomic_u32 does not match uint32_t alignment");
+
+
+// Reference counting.
+
 // CRYPTO_REFCOUNT_MAX is the value at which the reference count saturates.
 #define CRYPTO_REFCOUNT_MAX 0xffffffff
 
diff --git a/crypto/refcount_c11.c b/crypto/refcount.c
similarity index 66%
rename from crypto/refcount_c11.c
rename to crypto/refcount.c
index a1781c6..74ebdd7 100644
--- a/crypto/refcount_c11.c
+++ b/crypto/refcount.c
@@ -14,39 +14,35 @@
 
 #include "internal.h"
 
-
-#if defined(OPENSSL_C11_ATOMIC)
-
 #include <assert.h>
 #include <stdalign.h>
-#include <stdatomic.h>
 #include <stdlib.h>
 
 
 // See comment above the typedef of CRYPTO_refcount_t about these tests.
-static_assert(alignof(CRYPTO_refcount_t) == alignof(_Atomic CRYPTO_refcount_t),
-              "_Atomic alters the needed alignment of a reference count");
-static_assert(sizeof(CRYPTO_refcount_t) == sizeof(_Atomic CRYPTO_refcount_t),
-              "_Atomic alters the size of a reference count");
+static_assert(alignof(CRYPTO_refcount_t) == alignof(CRYPTO_atomic_u32),
+              "CRYPTO_refcount_t does not match CRYPTO_atomic_u32 alignment");
+static_assert(sizeof(CRYPTO_refcount_t) == sizeof(CRYPTO_atomic_u32),
+              "CRYPTO_refcount_t does not match CRYPTO_atomic_u32 size");
 
 static_assert((CRYPTO_refcount_t)-1 == CRYPTO_REFCOUNT_MAX,
               "CRYPTO_REFCOUNT_MAX is incorrect");
 
 void CRYPTO_refcount_inc(CRYPTO_refcount_t *in_count) {
-  _Atomic CRYPTO_refcount_t *count = (_Atomic CRYPTO_refcount_t *) in_count;
-  uint32_t expected = atomic_load(count);
+  CRYPTO_atomic_u32 *count = (CRYPTO_atomic_u32 *)in_count;
+  uint32_t expected = CRYPTO_atomic_load_u32(count);
 
   while (expected != CRYPTO_REFCOUNT_MAX) {
     uint32_t new_value = expected + 1;
-    if (atomic_compare_exchange_weak(count, &expected, new_value)) {
+    if (CRYPTO_atomic_compare_exchange_weak_u32(count, &expected, new_value)) {
       break;
     }
   }
 }
 
 int CRYPTO_refcount_dec_and_test_zero(CRYPTO_refcount_t *in_count) {
-  _Atomic CRYPTO_refcount_t *count = (_Atomic CRYPTO_refcount_t *)in_count;
-  uint32_t expected = atomic_load(count);
+  CRYPTO_atomic_u32 *count = (CRYPTO_atomic_u32 *)in_count;
+  uint32_t expected = CRYPTO_atomic_load_u32(count);
 
   for (;;) {
     if (expected == 0) {
@@ -55,11 +51,10 @@
       return 0;
     } else {
       const uint32_t new_value = expected - 1;
-      if (atomic_compare_exchange_weak(count, &expected, new_value)) {
+      if (CRYPTO_atomic_compare_exchange_weak_u32(count, &expected,
+                                                  new_value)) {
         return new_value == 0;
       }
     }
   }
 }
-
-#endif  // OPENSSL_C11_ATOMIC
diff --git a/crypto/refcount_no_threads.c b/crypto/refcount_no_threads.c
deleted file mode 100644
index 096b4fa..0000000
--- a/crypto/refcount_no_threads.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* Copyright (c) 2015, Google Inc.
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-
-#include "internal.h"
-
-#include <assert.h>
-#include <stdlib.h>
-
-
-#if !defined(OPENSSL_THREADS)
-
-static_assert((CRYPTO_refcount_t)-1 == CRYPTO_REFCOUNT_MAX,
-              "CRYPTO_REFCOUNT_MAX is incorrect");
-
-void CRYPTO_refcount_inc(CRYPTO_refcount_t *count) {
-  if (*count < CRYPTO_REFCOUNT_MAX) {
-    (*count)++;
-  }
-}
-
-int CRYPTO_refcount_dec_and_test_zero(CRYPTO_refcount_t *count) {
-  if (*count == 0) {
-    abort();
-  }
-  if (*count < CRYPTO_REFCOUNT_MAX) {
-    (*count)--;
-  }
-  return *count == 0;
-}
-
-#endif  // !OPENSSL_THREADS
diff --git a/crypto/refcount_win.c b/crypto/refcount_win.c
deleted file mode 100644
index 7a2740b..0000000
--- a/crypto/refcount_win.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/* Copyright (c) 2023, Google Inc.
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-
-#include "internal.h"
-
-#if defined(OPENSSL_WINDOWS_ATOMIC)
-
-#include <windows.h>
-
-
-// See comment above the typedef of CRYPTO_refcount_t about these tests.
-static_assert(alignof(CRYPTO_refcount_t) == alignof(LONG),
-              "CRYPTO_refcount_t does not match LONG alignment");
-static_assert(sizeof(CRYPTO_refcount_t) == sizeof(LONG),
-              "CRYPTO_refcount_t does not match LONG size");
-
-static_assert((CRYPTO_refcount_t)-1 == CRYPTO_REFCOUNT_MAX,
-              "CRYPTO_REFCOUNT_MAX is incorrect");
-
-static uint32_t atomic_load_u32(volatile LONG *ptr) {
-  // This is not ideal because it still writes to a cacheline. MSVC is not able
-  // to optimize this to a true atomic read, and Windows does not provide an
-  // InterlockedLoad function.
-  //
-  // The Windows documentation [1] does say "Simple reads and writes to
-  // properly-aligned 32-bit variables are atomic operations", but this is not
-  // phrased in terms of the C11 and C++11 memory models, and indeed a read or
-  // write seems to produce slightly different code on MSVC than a sequentially
-  // consistent std::atomic::load in C++. Moreover, it is unclear if non-MSVC
-  // compilers on Windows provide the same guarantees. Thus we avoid relying on
-  // this and instead still use an interlocked function. This is still
-  // preferable a global mutex, and eventually this code will be replaced by
-  // [2]. Additionally, on clang-cl, we'll use the |OPENSSL_C11_ATOMIC| path.
-  //
-  // [1] https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access
-  // [2] https://devblogs.microsoft.com/cppblog/c11-atomics-in-visual-studio-2022-version-17-5-preview-2/
-  return (uint32_t)InterlockedCompareExchange(ptr, 0, 0);
-}
-
-static int atomic_compare_exchange_u32(volatile LONG *ptr, uint32_t *expected32,
-                                       uint32_t desired) {
-  LONG expected = (LONG)*expected32;
-  LONG actual = InterlockedCompareExchange(ptr, (LONG)desired, expected);
-  *expected32 = (uint32_t)actual;
-  return actual == expected;
-}
-
-void CRYPTO_refcount_inc(CRYPTO_refcount_t *in_count) {
-  volatile LONG *count = (volatile LONG *)in_count;
-  uint32_t expected = atomic_load_u32(count);
-
-  while (expected != CRYPTO_REFCOUNT_MAX) {
-    const uint32_t new_value = expected + 1;
-    if (atomic_compare_exchange_u32(count, &expected, new_value)) {
-      break;
-    }
-  }
-}
-
-int CRYPTO_refcount_dec_and_test_zero(CRYPTO_refcount_t *in_count) {
-  volatile LONG *count = (volatile LONG *)in_count;
-  uint32_t expected = atomic_load_u32(count);
-
-  for (;;) {
-    if (expected == 0) {
-      abort();
-    } else if (expected == CRYPTO_REFCOUNT_MAX) {
-      return 0;
-    } else {
-      const uint32_t new_value = expected - 1;
-      if (atomic_compare_exchange_u32(count, &expected, new_value)) {
-        return new_value == 0;
-      }
-    }
-  }
-}
-
-#endif  // OPENSSL_WINDOWS_ATOMIC
diff --git a/include/openssl/thread.h b/include/openssl/thread.h
index afa9f08..695182b 100644
--- a/include/openssl/thread.h
+++ b/include/openssl/thread.h
@@ -100,7 +100,7 @@
 // _Atomic qualifier. However, this header is included by C++ programs as well
 // as C code that might not set -std=c11. So, in practice, it's not possible to
 // do that. Instead we statically assert that the size and native alignment of
-// a plain uint32_t and an _Atomic uint32_t are equal in refcount_c11.c.
+// a plain uint32_t and an _Atomic uint32_t are equal in refcount.c.
 typedef uint32_t CRYPTO_refcount_t;