Add native support for onces and thread-local storage.

Historically, OpenSSL has used callbacks for anything thread related,
but we don't actually have that many threading libraries to worry about:
just pthreads and Windows (I hope).

That suggests that it's quite reasonable to handle threading ourselves,
and eliminate the need for users to remember to install the thread
callbacks.

The first user of this would be ERR, which currently simulates
thread-local storage using a lock around a hash table keyed by the TID.
(Although I suspect that change will need some CMake work in order that
libpthread is automatically included with libcrypto when linking tests
etc, but not on Windows and without lots of ifs.)

Change-Id: I4dd088e3794506747f875c1f3e92b9bc6700fad2
Reviewed-on: https://boringssl-review.googlesource.com/4010
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt
index c454664..9eb9452 100644
--- a/crypto/CMakeLists.txt
+++ b/crypto/CMakeLists.txt
@@ -140,6 +140,8 @@
   crypto.c
   mem.c
   thread.c
+  thread_pthread.c
+  thread_win.c
   ex_data.c
   ex_data_impl.c
   time_support.c
@@ -195,5 +197,17 @@
 
 target_link_libraries(constant_time_test crypto)
 
+add_executable(
+  thread_test
+
+  thread_test.c
+)
+
+if(MSVC)
+  target_link_libraries(thread_test crypto)
+else()
+  target_link_libraries(thread_test crypto pthread)
+endif()
+
 perlasm(cpu-x86_64-asm.${ASM_EXT} cpu-x86_64-asm.pl)
 perlasm(cpu-x86-asm.${ASM_EXT} cpu-x86-asm.pl)
diff --git a/crypto/internal.h b/crypto/internal.h
index 4336e65..ec3b3e2 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -111,6 +111,10 @@
 
 #include <openssl/ex_data.h>
 
+#if !defined(OPENSSL_WINDOWS)
+#include <pthread.h>
+#endif
+
 #if defined(__cplusplus)
 extern "C" {
 #endif
@@ -295,6 +299,62 @@
 }
 
 
+/* Thread-safe initialisation. */
+
+#if !defined(OPENSSL_WINDOWS)
+typedef pthread_once_t CRYPTO_once_t;
+#define CRYPTO_ONCE_INIT PTHREAD_ONCE_INIT
+#else
+typedef int32_t CRYPTO_once_t;
+#define CRYPTO_ONCE_INIT 0
+#endif
+
+/* CRYPTO_once calls |init| exactly once per process. This is thread-safe: if
+ * concurrent threads call |CRYPTO_once| with the same |CRYPTO_once_t| argument
+ * then they will block until |init| completes, but |init| will have only been
+ * called once.
+ *
+ * The |once| argument must be a |CRYPTO_once_t| that has been initialised with
+ * the value |CRYPTO_ONCE_INIT|. */
+void CRYPTO_once(CRYPTO_once_t *once, void (*init)(void));
+
+
+/* Thread local storage. */
+
+/* thread_local_data_t enumerates the types of thread-local data that can be
+ * stored. */
+typedef enum {
+  OPENSSL_THREAD_LOCAL_ERR = 0,
+  OPENSSL_THREAD_LOCAL_TEST,
+  NUM_OPENSSL_THREAD_LOCALS,
+} thread_local_data_t;
+
+/* thread_local_destructor_t is the type of a destructor function that will be
+ * called when a thread exits and its thread-local storage needs to be freed. */
+typedef void (*thread_local_destructor_t)(void *);
+
+/* CRYPTO_get_thread_local gets the pointer value that is stored for the
+ * current thread for the given index, or NULL if none has been set. */
+void *CRYPTO_get_thread_local(thread_local_data_t value);
+
+/* CRYPTO_set_thread_local sets a pointer value for the current thread at the
+ * given index. This function should only be called once per thread for a given
+ * |index|: rather than update the pointer value itself, update the data that
+ * is pointed to.
+ *
+ * The destructor function will be called when a thread exits to free this
+ * thread-local data. All calls to |CRYPTO_set_thread_local| with the same
+ * |index| should have the same |destructor| argument. The destructor may be
+ * called with a NULL argument if a thread that never set a thread-local
+ * pointer for |index|, exits. The destructor may be called concurrently with
+ * different arguments.
+ *
+ * This function returns one on success or zero on error. If it returns zero
+ * then |destructor| has been called with |value| already. */
+int CRYPTO_set_thread_local(thread_local_data_t index, void *value,
+                            thread_local_destructor_t destructor);
+
+
 #if defined(__cplusplus)
 }  /* extern C */
 #endif
diff --git a/crypto/thread_pthread.c b/crypto/thread_pthread.c
new file mode 100644
index 0000000..1516ea1
--- /dev/null
+++ b/crypto/thread_pthread.c
@@ -0,0 +1,111 @@
+/* Copyright (c) 2015, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include "internal.h"
+
+#if !defined(OPENSSL_WINDOWS)
+
+#include <pthread.h>
+#include <string.h>
+
+#include <openssl/mem.h>
+
+
+void CRYPTO_once(CRYPTO_once_t *once, void (*init)(void)) {
+  pthread_once(once, init);
+}
+
+static pthread_mutex_t g_destructors_lock = PTHREAD_MUTEX_INITIALIZER;
+static thread_local_destructor_t g_destructors[NUM_OPENSSL_THREAD_LOCALS];
+
+static void thread_local_destructor(void *arg) {
+  if (arg == NULL) {
+    return;
+  }
+
+  thread_local_destructor_t destructors[NUM_OPENSSL_THREAD_LOCALS];
+  if (pthread_mutex_lock(&g_destructors_lock) != 0) {
+    return;
+  }
+  memcpy(destructors, g_destructors, sizeof(destructors));
+  pthread_mutex_unlock(&g_destructors_lock);
+
+  unsigned i;
+  void **pointers = arg;
+  for (i = 0; i < NUM_OPENSSL_THREAD_LOCALS; i++) {
+    if (destructors[i] != NULL) {
+      destructors[i](pointers[i]);
+    }
+  }
+
+  OPENSSL_free(pointers);
+}
+
+static pthread_once_t g_thread_local_init_once = PTHREAD_ONCE_INIT;
+static pthread_key_t g_thread_local_key;
+static int g_thread_local_failed = 0;
+
+static void thread_local_init(void) {
+  g_thread_local_failed =
+      pthread_key_create(&g_thread_local_key, thread_local_destructor) != 0;
+}
+
+void *CRYPTO_get_thread_local(thread_local_data_t index) {
+  CRYPTO_once(&g_thread_local_init_once, thread_local_init);
+  if (g_thread_local_failed) {
+    return NULL;
+  }
+
+  void **pointers = pthread_getspecific(g_thread_local_key);
+  if (pointers == NULL) {
+    return NULL;
+  }
+  return pointers[index];
+}
+
+int CRYPTO_set_thread_local(thread_local_data_t index, void *value,
+                            thread_local_destructor_t destructor) {
+  CRYPTO_once(&g_thread_local_init_once, thread_local_init);
+  if (g_thread_local_failed) {
+    destructor(value);
+    return 0;
+  }
+
+  void **pointers = pthread_getspecific(g_thread_local_key);
+  if (pointers == NULL) {
+    pointers = OPENSSL_malloc(sizeof(void *) * NUM_OPENSSL_THREAD_LOCALS);
+    if (pointers == NULL) {
+      destructor(value);
+      return 0;
+    }
+    memset(pointers, 0, sizeof(void *) * NUM_OPENSSL_THREAD_LOCALS);
+    if (pthread_setspecific(g_thread_local_key, pointers) != 0) {
+      OPENSSL_free(pointers);
+      destructor(value);
+      return 0;
+    }
+  }
+
+  if (pthread_mutex_lock(&g_destructors_lock) != 0) {
+    destructor(value);
+    return 0;
+  }
+  g_destructors[index] = destructor;
+  pthread_mutex_unlock(&g_destructors_lock);
+
+  pointers[index] = value;
+  return 1;
+}
+
+#endif  /* !OPENSSL_WINDOWS */
diff --git a/crypto/thread_test.c b/crypto/thread_test.c
new file mode 100644
index 0000000..04d71c5
--- /dev/null
+++ b/crypto/thread_test.c
@@ -0,0 +1,191 @@
+/* Copyright (c) 2015, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include "internal.h"
+
+#include <stdio.h>
+
+
+#if defined(OPENSSL_WINDOWS)
+
+#pragma warning(push, 3)
+#include <Windows.h>
+#pragma warning(pop)
+
+typedef HANDLE thread_t;
+
+static DWORD WINAPI thread_run(LPVOID arg) {
+  void (*thread_func)(void);
+  /* VC really doesn't like casting between data and function pointers. */
+  memcpy(&thread_func, &arg, sizeof(thread_func));
+  thread_func();
+  return 0;
+}
+
+static int run_thread(thread_t *out_thread, void (*thread_func)(void)) {
+  void *arg;
+  /* VC really doesn't like casting between data and function pointers. */
+  memcpy(&arg, &thread_func, sizeof(arg));
+
+  *out_thread = CreateThread(NULL /* security attributes */,
+                             0 /* default stack size */, thread_run, arg,
+                             0 /* run immediately */, NULL /* ignore id */);
+  return *out_thread != NULL;
+}
+
+static int wait_for_thread(thread_t thread) {
+  return WaitForSingleObject(thread, INFINITE) == 0;
+}
+
+#else
+
+#include <pthread.h>
+
+typedef pthread_t thread_t;
+
+static void *thread_run(void *arg) {
+  void (*thread_func)(void) = arg;
+  thread_func();
+  return NULL;
+}
+
+static int run_thread(thread_t *out_thread, void (*thread_func)(void)) {
+  return pthread_create(out_thread, NULL /* default attributes */, thread_run,
+                        thread_func) == 0;
+}
+
+static int wait_for_thread(thread_t thread) {
+  return pthread_join(thread, NULL) == 0;
+}
+
+#endif  /* OPENSSL_WINDOWS */
+
+static unsigned g_once_init_called = 0;
+
+static void once_init(void) {
+  g_once_init_called++;
+}
+
+static CRYPTO_once_t g_test_once = CRYPTO_ONCE_INIT;
+
+static void call_once_thread(void) {
+  CRYPTO_once(&g_test_once, once_init);
+}
+
+static int test_once(void) {
+  if (g_once_init_called != 0) {
+    fprintf(stderr, "g_once_init_called was non-zero at start.\n");
+    return 0;
+  }
+
+  thread_t thread;
+  if (!run_thread(&thread, call_once_thread) ||
+      !wait_for_thread(thread)) {
+    fprintf(stderr, "thread failed.\n");
+    return 0;
+  }
+
+  CRYPTO_once(&g_test_once, once_init);
+
+  if (g_once_init_called != 1) {
+    fprintf(stderr, "Expected init function to be called once, but found %u.\n",
+            g_once_init_called);
+    return 0;
+  }
+
+  return 1;
+}
+
+
+static int g_test_thread_ok = 0;
+static unsigned g_destructor_called_count = 0;
+
+static void thread_local_destructor(void *arg) {
+  if (arg == NULL) {
+    return;
+  }
+
+  unsigned *count = arg;
+  (*count)++;
+}
+
+static void thread_local_test_thread(void) {
+  void *ptr = CRYPTO_get_thread_local(OPENSSL_THREAD_LOCAL_TEST);
+  if (ptr != NULL) {
+    return;
+  }
+
+  if (!CRYPTO_set_thread_local(OPENSSL_THREAD_LOCAL_TEST,
+                               &g_destructor_called_count,
+                               thread_local_destructor)) {
+    return;
+  }
+
+  if (CRYPTO_get_thread_local(OPENSSL_THREAD_LOCAL_TEST) !=
+      &g_destructor_called_count) {
+    return;
+  }
+
+  g_test_thread_ok = 1;
+}
+
+static void thread_local_test2_thread(void) {}
+
+static int test_thread_local(void) {
+  void *ptr = CRYPTO_get_thread_local(OPENSSL_THREAD_LOCAL_TEST);
+  if (ptr != NULL) {
+    fprintf(stderr, "Thread-local data was non-NULL at start.\n");
+  }
+
+  thread_t thread;
+  if (!run_thread(&thread, thread_local_test_thread) ||
+      !wait_for_thread(thread)) {
+    fprintf(stderr, "thread failed.\n");
+    return 0;
+  }
+
+  if (!g_test_thread_ok) {
+    fprintf(stderr, "Thread-local data didn't work in thread.\n");
+    return 0;
+  }
+
+  if (g_destructor_called_count != 1) {
+    fprintf(stderr,
+            "Destructor should have been called once, but actually called %u "
+            "times.\n",
+            g_destructor_called_count);
+    return 0;
+  }
+
+  /* thread_local_test2_thread doesn't do anything, but it tests that the
+   * thread destructor function works even if thread-local storage wasn't used
+   * for a thread. */
+  if (!run_thread(&thread, thread_local_test2_thread) ||
+      !wait_for_thread(thread)) {
+    fprintf(stderr, "thread failed.\n");
+    return 0;
+  }
+
+  return 1;
+}
+
+int main(int argc, char **argv) {
+  if (!test_once() ||
+      !test_thread_local()) {
+    return 1;
+  }
+
+  printf("PASS\n");
+  return 0;
+}
diff --git a/crypto/thread_win.c b/crypto/thread_win.c
new file mode 100644
index 0000000..ee48f34
--- /dev/null
+++ b/crypto/thread_win.c
@@ -0,0 +1,220 @@
+/* Copyright (c) 2015, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include "internal.h"
+
+#if defined(OPENSSL_WINDOWS)
+
+#pragma warning(push, 3)
+#include <windows.h>
+#pragma warning(pop)
+
+#include <assert.h>
+#include <string.h>
+
+#include <openssl/mem.h>
+
+
+void CRYPTO_once(CRYPTO_once_t *in_once, void (*init)(void)) {
+  volatile LONG *once = (LONG*) in_once;
+
+  assert(sizeof(LONG) == sizeof(CRYPTO_once_t));
+  /* Values must be aligned. */
+  assert((((uintptr_t) once) & 3) == 0);
+
+  /* This assumes that reading *once has acquire semantics. This should be true
+   * on x86 and x86-64, where we expect Windows to run. */
+#if !defined(OPENSSL_X86) && !defined(OPENSSL_X86_64)
+#error "Windows once code may not work on other platforms." \
+       "You can use InitOnceBeginInitialize on >=Vista"
+#endif
+  if (*once == 1) {
+    return;
+  }
+
+  for (;;) {
+    switch (InterlockedCompareExchange(once, 2, 0)) {
+      case 0:
+        /* The value was zero so we are the first thread to call |CRYPTO_once|
+         * on it. */
+        init();
+        /* Write one to indicate that initialisation is complete. */
+        InterlockedExchange(once, 1);
+        return;
+
+      case 1:
+        /* Another thread completed initialisation between our fast-path check
+         * and |InterlockedCompareExchange|. */
+        return;
+
+      case 2:
+        /* Another thread is running the initialisation. Switch to it then try
+         * again. */
+        SwitchToThread();
+        break;
+
+      default:
+        abort();
+    }
+  }
+}
+
+static CRITICAL_SECTION g_destructors_lock;
+static thread_local_destructor_t g_destructors[NUM_OPENSSL_THREAD_LOCALS];
+
+static CRYPTO_once_t g_thread_local_init_once = CRYPTO_ONCE_INIT;
+static DWORD g_thread_local_key;
+static int g_thread_local_failed;
+
+static void thread_local_init(void) {
+  if (!InitializeCriticalSectionAndSpinCount(&g_destructors_lock, 0x400)) {
+    g_thread_local_failed = 1;
+    return;
+  }
+  g_thread_local_key = TlsAlloc();
+  g_thread_local_failed = (g_thread_local_key == TLS_OUT_OF_INDEXES);
+}
+
+static void NTAPI thread_local_destructor(PVOID module,
+                                          DWORD reason, PVOID reserved) {
+  if (DLL_THREAD_DETACH != reason && DLL_PROCESS_DETACH != reason) {
+    return;
+  }
+
+  CRYPTO_once(&g_thread_local_init_once, thread_local_init);
+  if (g_thread_local_failed) {
+    return;
+  }
+
+  void **pointers = (void**) TlsGetValue(g_thread_local_key);
+  if (pointers == NULL) {
+    return;
+  }
+
+  thread_local_destructor_t destructors[NUM_OPENSSL_THREAD_LOCALS];
+
+  EnterCriticalSection(&g_destructors_lock);
+  memcpy(destructors, g_destructors, sizeof(destructors));
+  LeaveCriticalSection(&g_destructors_lock);
+
+  unsigned i;
+  for (i = 0; i < NUM_OPENSSL_THREAD_LOCALS; i++) {
+    if (destructors[i] != NULL) {
+      destructors[i](pointers[i]);
+    }
+  }
+
+  OPENSSL_free(pointers);
+}
+
+/* Thread Termination Callbacks.
+ *
+ * Windows doesn't support a per-thread destructor with its TLS primitives.
+ * So, we build it manually by inserting a function to be called on each
+ * thread's exit. This magic is from http://www.codeproject.com/threads/tls.asp
+ * and it works for VC++ 7.0 and later.
+ *
+ * Force a reference to _tls_used to make the linker create the TLS directory
+ * if it's not already there. (E.g. if __declspec(thread) is not used). Force
+ * a reference to p_thread_callback_base to prevent whole program optimization
+ * from discarding the variable. */
+#ifdef _WIN64
+#pragma comment(linker, "/INCLUDE:_tls_used")
+#pragma comment(linker, "/INCLUDE:p_thread_callback_base")
+#else
+#pragma comment(linker, "/INCLUDE:__tls_used")
+#pragma comment(linker, "/INCLUDE:_p_thread_callback_base")
+#endif
+
+/* .CRT$XLA to .CRT$XLZ is an array of PIMAGE_TLS_CALLBACK pointers that are
+ * called automatically by the OS loader code (not the CRT) when the module is
+ * loaded and on thread creation. They are NOT called if the module has been
+ * loaded by a LoadLibrary() call. It must have implicitly been loaded at
+ * process startup.
+ *
+ * By implicitly loaded, I mean that it is directly referenced by the main EXE
+ * or by one of its dependent DLLs. Delay-loaded DLL doesn't count as being
+ * implicitly loaded.
+ *
+ * See VC\crt\src\tlssup.c for reference. */
+
+/* The linker must not discard p_thread_callback_base. (We force a reference
+ * to this variable with a linker /INCLUDE:symbol pragma to ensure that.) If
+ * this variable is discarded, the OnThreadExit function will never be
+ * called. */
+#ifdef _WIN64
+
+/* .CRT section is merged with .rdata on x64 so it must be constant data. */
+#pragma const_seg(".CRT$XLC")
+/* When defining a const variable, it must have external linkage to be sure the
+ * linker doesn't discard it. */
+extern const PIMAGE_TLS_CALLBACK p_thread_callback_base;
+const PIMAGE_TLS_CALLBACK p_thread_callback_base = thread_local_destructor;
+/* Reset the default section. */
+#pragma const_seg()
+
+#else
+
+#pragma data_seg(".CRT$XLC")
+PIMAGE_TLS_CALLBACK p_thread_callback_base = thread_local_destructor;
+/* Reset the default section. */
+#pragma data_seg()
+
+#endif  /* _WIN64 */
+
+void *CRYPTO_get_thread_local(thread_local_data_t index) {
+  CRYPTO_once(&g_thread_local_init_once, thread_local_init);
+  if (g_thread_local_failed) {
+    return NULL;
+  }
+
+  void **pointers = TlsGetValue(g_thread_local_key);
+  if (pointers == NULL) {
+    return NULL;
+  }
+  return pointers[index];
+}
+
+int CRYPTO_set_thread_local(thread_local_data_t index, void *value,
+                            thread_local_destructor_t destructor) {
+  CRYPTO_once(&g_thread_local_init_once, thread_local_init);
+  if (g_thread_local_failed) {
+    destructor(value);
+    return 0;
+  }
+
+  void **pointers = TlsGetValue(g_thread_local_key);
+  if (pointers == NULL) {
+    pointers = OPENSSL_malloc(sizeof(void *) * NUM_OPENSSL_THREAD_LOCALS);
+    if (pointers == NULL) {
+      destructor(value);
+      return 0;
+    }
+    memset(pointers, 0, sizeof(void *) * NUM_OPENSSL_THREAD_LOCALS);
+    if (TlsSetValue(g_thread_local_key, pointers) == 0) {
+      OPENSSL_free(pointers);
+      destructor(value);
+      return 0;
+    }
+  }
+
+  EnterCriticalSection(&g_destructors_lock);
+  g_destructors[index] = destructor;
+  LeaveCriticalSection(&g_destructors_lock);
+
+  pointers[index] = value;
+  return 1;
+}
+
+#endif  /* OPENSSL_WINDOWS */
diff --git a/util/all_tests.go b/util/all_tests.go
index ded798e..5927257 100644
--- a/util/all_tests.go
+++ b/util/all_tests.go
@@ -81,6 +81,7 @@
 	{"crypto/modes/gcm_test"},
 	{"crypto/pkcs8/pkcs12_test"},
 	{"crypto/rsa/rsa_test"},
+	{"crypto/thread_test"},
 	{"crypto/x509/pkcs7_test"},
 	{"crypto/x509v3/tab_test"},
 	{"crypto/x509v3/v3name_test"},