diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt
index c454664..9eb9452 100644
--- a/crypto/CMakeLists.txt
+++ b/crypto/CMakeLists.txt
@@ -140,6 +140,8 @@
   crypto.c
   mem.c
   thread.c
+  thread_pthread.c
+  thread_win.c
   ex_data.c
   ex_data_impl.c
   time_support.c
@@ -195,5 +197,17 @@
 
 target_link_libraries(constant_time_test crypto)
 
+add_executable(
+  thread_test
+
+  thread_test.c
+)
+
+if(MSVC)
+  target_link_libraries(thread_test crypto)
+else()
+  target_link_libraries(thread_test crypto pthread)
+endif()
+
 perlasm(cpu-x86_64-asm.${ASM_EXT} cpu-x86_64-asm.pl)
 perlasm(cpu-x86-asm.${ASM_EXT} cpu-x86-asm.pl)
diff --git a/crypto/internal.h b/crypto/internal.h
index 4336e65..ec3b3e2 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -111,6 +111,10 @@
 
 #include <openssl/ex_data.h>
 
+#if !defined(OPENSSL_WINDOWS)
+#include <pthread.h>
+#endif
+
 #if defined(__cplusplus)
 extern "C" {
 #endif
@@ -295,6 +299,62 @@
 }
 
 
+/* Thread-safe initialisation. */
+
+#if !defined(OPENSSL_WINDOWS)
+typedef pthread_once_t CRYPTO_once_t;
+#define CRYPTO_ONCE_INIT PTHREAD_ONCE_INIT
+#else
+typedef int32_t CRYPTO_once_t;
+#define CRYPTO_ONCE_INIT 0
+#endif
+
+/* CRYPTO_once calls |init| exactly once per process. This is thread-safe: if
+ * concurrent threads call |CRYPTO_once| with the same |CRYPTO_once_t| argument
+ * then they will block until |init| completes, but |init| will have only been
+ * called once.
+ *
+ * The |once| argument must be a |CRYPTO_once_t| that has been initialised with
+ * the value |CRYPTO_ONCE_INIT|. */
+void CRYPTO_once(CRYPTO_once_t *once, void (*init)(void));
+
+
+/* Thread local storage. */
+
+/* thread_local_data_t enumerates the types of thread-local data that can be
+ * stored. */
+typedef enum {
+  OPENSSL_THREAD_LOCAL_ERR = 0,
+  OPENSSL_THREAD_LOCAL_TEST,
+  NUM_OPENSSL_THREAD_LOCALS,
+} thread_local_data_t;
+
+/* thread_local_destructor_t is the type of a destructor function that will be
+ * called when a thread exits and its thread-local storage needs to be freed. */
+typedef void (*thread_local_destructor_t)(void *);
+
+/* CRYPTO_get_thread_local gets the pointer value that is stored for the
+ * current thread for the given index, or NULL if none has been set. */
+void *CRYPTO_get_thread_local(thread_local_data_t value);
+
+/* CRYPTO_set_thread_local sets a pointer value for the current thread at the
+ * given index. This function should only be called once per thread for a given
+ * |index|: rather than update the pointer value itself, update the data that
+ * is pointed to.
+ *
+ * The destructor function will be called when a thread exits to free this
+ * thread-local data. All calls to |CRYPTO_set_thread_local| with the same
+ * |index| should have the same |destructor| argument. The destructor may be
+ * called with a NULL argument if a thread that never set a thread-local
+ * pointer for |index|, exits. The destructor may be called concurrently with
+ * different arguments.
+ *
+ * This function returns one on success or zero on error. If it returns zero
+ * then |destructor| has been called with |value| already. */
+int CRYPTO_set_thread_local(thread_local_data_t index, void *value,
+                            thread_local_destructor_t destructor);
+
+
 #if defined(__cplusplus)
 }  /* extern C */
 #endif
diff --git a/crypto/thread_pthread.c b/crypto/thread_pthread.c
new file mode 100644
index 0000000..1516ea1
--- /dev/null
+++ b/crypto/thread_pthread.c
@@ -0,0 +1,111 @@
+/* Copyright (c) 2015, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include "internal.h"
+
+#if !defined(OPENSSL_WINDOWS)
+
+#include <pthread.h>
+#include <string.h>
+
+#include <openssl/mem.h>
+
+
+void CRYPTO_once(CRYPTO_once_t *once, void (*init)(void)) {
+  pthread_once(once, init);
+}
+
+static pthread_mutex_t g_destructors_lock = PTHREAD_MUTEX_INITIALIZER;
+static thread_local_destructor_t g_destructors[NUM_OPENSSL_THREAD_LOCALS];
+
+static void thread_local_destructor(void *arg) {
+  if (arg == NULL) {
+    return;
+  }
+
+  thread_local_destructor_t destructors[NUM_OPENSSL_THREAD_LOCALS];
+  if (pthread_mutex_lock(&g_destructors_lock) != 0) {
+    return;
+  }
+  memcpy(destructors, g_destructors, sizeof(destructors));
+  pthread_mutex_unlock(&g_destructors_lock);
+
+  unsigned i;
+  void **pointers = arg;
+  for (i = 0; i < NUM_OPENSSL_THREAD_LOCALS; i++) {
+    if (destructors[i] != NULL) {
+      destructors[i](pointers[i]);
+    }
+  }
+
+  OPENSSL_free(pointers);
+}
+
+static pthread_once_t g_thread_local_init_once = PTHREAD_ONCE_INIT;
+static pthread_key_t g_thread_local_key;
+static int g_thread_local_failed = 0;
+
+static void thread_local_init(void) {
+  g_thread_local_failed =
+      pthread_key_create(&g_thread_local_key, thread_local_destructor) != 0;
+}
+
+void *CRYPTO_get_thread_local(thread_local_data_t index) {
+  CRYPTO_once(&g_thread_local_init_once, thread_local_init);
+  if (g_thread_local_failed) {
+    return NULL;
+  }
+
+  void **pointers = pthread_getspecific(g_thread_local_key);
+  if (pointers == NULL) {
+    return NULL;
+  }
+  return pointers[index];
+}
+
+int CRYPTO_set_thread_local(thread_local_data_t index, void *value,
+                            thread_local_destructor_t destructor) {
+  CRYPTO_once(&g_thread_local_init_once, thread_local_init);
+  if (g_thread_local_failed) {
+    destructor(value);
+    return 0;
+  }
+
+  void **pointers = pthread_getspecific(g_thread_local_key);
+  if (pointers == NULL) {
+    pointers = OPENSSL_malloc(sizeof(void *) * NUM_OPENSSL_THREAD_LOCALS);
+    if (pointers == NULL) {
+      destructor(value);
+      return 0;
+    }
+    memset(pointers, 0, sizeof(void *) * NUM_OPENSSL_THREAD_LOCALS);
+    if (pthread_setspecific(g_thread_local_key, pointers) != 0) {
+      OPENSSL_free(pointers);
+      destructor(value);
+      return 0;
+    }
+  }
+
+  if (pthread_mutex_lock(&g_destructors_lock) != 0) {
+    destructor(value);
+    return 0;
+  }
+  g_destructors[index] = destructor;
+  pthread_mutex_unlock(&g_destructors_lock);
+
+  pointers[index] = value;
+  return 1;
+}
+
+#endif  /* !OPENSSL_WINDOWS */
diff --git a/crypto/thread_test.c b/crypto/thread_test.c
new file mode 100644
index 0000000..04d71c5
--- /dev/null
+++ b/crypto/thread_test.c
@@ -0,0 +1,191 @@
+/* Copyright (c) 2015, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include "internal.h"
+
+#include <stdio.h>
+
+
+#if defined(OPENSSL_WINDOWS)
+
+#pragma warning(push, 3)
+#include <Windows.h>
+#pragma warning(pop)
+
+typedef HANDLE thread_t;
+
+static DWORD WINAPI thread_run(LPVOID arg) {
+  void (*thread_func)(void);
+  /* VC really doesn't like casting between data and function pointers. */
+  memcpy(&thread_func, &arg, sizeof(thread_func));
+  thread_func();
+  return 0;
+}
+
+static int run_thread(thread_t *out_thread, void (*thread_func)(void)) {
+  void *arg;
+  /* VC really doesn't like casting between data and function pointers. */
+  memcpy(&arg, &thread_func, sizeof(arg));
+
+  *out_thread = CreateThread(NULL /* security attributes */,
+                             0 /* default stack size */, thread_run, arg,
+                             0 /* run immediately */, NULL /* ignore id */);
+  return *out_thread != NULL;
+}
+
+static int wait_for_thread(thread_t thread) {
+  return WaitForSingleObject(thread, INFINITE) == 0;
+}
+
+#else
+
+#include <pthread.h>
+
+typedef pthread_t thread_t;
+
+static void *thread_run(void *arg) {
+  void (*thread_func)(void) = arg;
+  thread_func();
+  return NULL;
+}
+
+static int run_thread(thread_t *out_thread, void (*thread_func)(void)) {
+  return pthread_create(out_thread, NULL /* default attributes */, thread_run,
+                        thread_func) == 0;
+}
+
+static int wait_for_thread(thread_t thread) {
+  return pthread_join(thread, NULL) == 0;
+}
+
+#endif  /* OPENSSL_WINDOWS */
+
+static unsigned g_once_init_called = 0;
+
+static void once_init(void) {
+  g_once_init_called++;
+}
+
+static CRYPTO_once_t g_test_once = CRYPTO_ONCE_INIT;
+
+static void call_once_thread(void) {
+  CRYPTO_once(&g_test_once, once_init);
+}
+
+static int test_once(void) {
+  if (g_once_init_called != 0) {
+    fprintf(stderr, "g_once_init_called was non-zero at start.\n");
+    return 0;
+  }
+
+  thread_t thread;
+  if (!run_thread(&thread, call_once_thread) ||
+      !wait_for_thread(thread)) {
+    fprintf(stderr, "thread failed.\n");
+    return 0;
+  }
+
+  CRYPTO_once(&g_test_once, once_init);
+
+  if (g_once_init_called != 1) {
+    fprintf(stderr, "Expected init function to be called once, but found %u.\n",
+            g_once_init_called);
+    return 0;
+  }
+
+  return 1;
+}
+
+
+static int g_test_thread_ok = 0;
+static unsigned g_destructor_called_count = 0;
+
+static void thread_local_destructor(void *arg) {
+  if (arg == NULL) {
+    return;
+  }
+
+  unsigned *count = arg;
+  (*count)++;
+}
+
+static void thread_local_test_thread(void) {
+  void *ptr = CRYPTO_get_thread_local(OPENSSL_THREAD_LOCAL_TEST);
+  if (ptr != NULL) {
+    return;
+  }
+
+  if (!CRYPTO_set_thread_local(OPENSSL_THREAD_LOCAL_TEST,
+                               &g_destructor_called_count,
+                               thread_local_destructor)) {
+    return;
+  }
+
+  if (CRYPTO_get_thread_local(OPENSSL_THREAD_LOCAL_TEST) !=
+      &g_destructor_called_count) {
+    return;
+  }
+
+  g_test_thread_ok = 1;
+}
+
+static void thread_local_test2_thread(void) {}
+
+static int test_thread_local(void) {
+  void *ptr = CRYPTO_get_thread_local(OPENSSL_THREAD_LOCAL_TEST);
+  if (ptr != NULL) {
+    fprintf(stderr, "Thread-local data was non-NULL at start.\n");
+  }
+
+  thread_t thread;
+  if (!run_thread(&thread, thread_local_test_thread) ||
+      !wait_for_thread(thread)) {
+    fprintf(stderr, "thread failed.\n");
+    return 0;
+  }
+
+  if (!g_test_thread_ok) {
+    fprintf(stderr, "Thread-local data didn't work in thread.\n");
+    return 0;
+  }
+
+  if (g_destructor_called_count != 1) {
+    fprintf(stderr,
+            "Destructor should have been called once, but actually called %u "
+            "times.\n",
+            g_destructor_called_count);
+    return 0;
+  }
+
+  /* thread_local_test2_thread doesn't do anything, but it tests that the
+   * thread destructor function works even if thread-local storage wasn't used
+   * for a thread. */
+  if (!run_thread(&thread, thread_local_test2_thread) ||
+      !wait_for_thread(thread)) {
+    fprintf(stderr, "thread failed.\n");
+    return 0;
+  }
+
+  return 1;
+}
+
+int main(int argc, char **argv) {
+  if (!test_once() ||
+      !test_thread_local()) {
+    return 1;
+  }
+
+  printf("PASS\n");
+  return 0;
+}
diff --git a/crypto/thread_win.c b/crypto/thread_win.c
new file mode 100644
index 0000000..ee48f34
--- /dev/null
+++ b/crypto/thread_win.c
@@ -0,0 +1,220 @@
+/* Copyright (c) 2015, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include "internal.h"
+
+#if defined(OPENSSL_WINDOWS)
+
+#pragma warning(push, 3)
+#include <windows.h>
+#pragma warning(pop)
+
+#include <assert.h>
+#include <string.h>
+
+#include <openssl/mem.h>
+
+
+void CRYPTO_once(CRYPTO_once_t *in_once, void (*init)(void)) {
+  volatile LONG *once = (LONG*) in_once;
+
+  assert(sizeof(LONG) == sizeof(CRYPTO_once_t));
+  /* Values must be aligned. */
+  assert((((uintptr_t) once) & 3) == 0);
+
+  /* This assumes that reading *once has acquire semantics. This should be true
+   * on x86 and x86-64, where we expect Windows to run. */
+#if !defined(OPENSSL_X86) && !defined(OPENSSL_X86_64)
+#error "Windows once code may not work on other platforms." \
+       "You can use InitOnceBeginInitialize on >=Vista"
+#endif
+  if (*once == 1) {
+    return;
+  }
+
+  for (;;) {
+    switch (InterlockedCompareExchange(once, 2, 0)) {
+      case 0:
+        /* The value was zero so we are the first thread to call |CRYPTO_once|
+         * on it. */
+        init();
+        /* Write one to indicate that initialisation is complete. */
+        InterlockedExchange(once, 1);
+        return;
+
+      case 1:
+        /* Another thread completed initialisation between our fast-path check
+         * and |InterlockedCompareExchange|. */
+        return;
+
+      case 2:
+        /* Another thread is running the initialisation. Switch to it then try
+         * again. */
+        SwitchToThread();
+        break;
+
+      default:
+        abort();
+    }
+  }
+}
+
+static CRITICAL_SECTION g_destructors_lock;
+static thread_local_destructor_t g_destructors[NUM_OPENSSL_THREAD_LOCALS];
+
+static CRYPTO_once_t g_thread_local_init_once = CRYPTO_ONCE_INIT;
+static DWORD g_thread_local_key;
+static int g_thread_local_failed;
+
+static void thread_local_init(void) {
+  if (!InitializeCriticalSectionAndSpinCount(&g_destructors_lock, 0x400)) {
+    g_thread_local_failed = 1;
+    return;
+  }
+  g_thread_local_key = TlsAlloc();
+  g_thread_local_failed = (g_thread_local_key == TLS_OUT_OF_INDEXES);
+}
+
+static void NTAPI thread_local_destructor(PVOID module,
+                                          DWORD reason, PVOID reserved) {
+  if (DLL_THREAD_DETACH != reason && DLL_PROCESS_DETACH != reason) {
+    return;
+  }
+
+  CRYPTO_once(&g_thread_local_init_once, thread_local_init);
+  if (g_thread_local_failed) {
+    return;
+  }
+
+  void **pointers = (void**) TlsGetValue(g_thread_local_key);
+  if (pointers == NULL) {
+    return;
+  }
+
+  thread_local_destructor_t destructors[NUM_OPENSSL_THREAD_LOCALS];
+
+  EnterCriticalSection(&g_destructors_lock);
+  memcpy(destructors, g_destructors, sizeof(destructors));
+  LeaveCriticalSection(&g_destructors_lock);
+
+  unsigned i;
+  for (i = 0; i < NUM_OPENSSL_THREAD_LOCALS; i++) {
+    if (destructors[i] != NULL) {
+      destructors[i](pointers[i]);
+    }
+  }
+
+  OPENSSL_free(pointers);
+}
+
+/* Thread Termination Callbacks.
+ *
+ * Windows doesn't support a per-thread destructor with its TLS primitives.
+ * So, we build it manually by inserting a function to be called on each
+ * thread's exit. This magic is from http://www.codeproject.com/threads/tls.asp
+ * and it works for VC++ 7.0 and later.
+ *
+ * Force a reference to _tls_used to make the linker create the TLS directory
+ * if it's not already there. (E.g. if __declspec(thread) is not used). Force
+ * a reference to p_thread_callback_base to prevent whole program optimization
+ * from discarding the variable. */
+#ifdef _WIN64
+#pragma comment(linker, "/INCLUDE:_tls_used")
+#pragma comment(linker, "/INCLUDE:p_thread_callback_base")
+#else
+#pragma comment(linker, "/INCLUDE:__tls_used")
+#pragma comment(linker, "/INCLUDE:_p_thread_callback_base")
+#endif
+
+/* .CRT$XLA to .CRT$XLZ is an array of PIMAGE_TLS_CALLBACK pointers that are
+ * called automatically by the OS loader code (not the CRT) when the module is
+ * loaded and on thread creation. They are NOT called if the module has been
+ * loaded by a LoadLibrary() call. It must have implicitly been loaded at
+ * process startup.
+ *
+ * By implicitly loaded, I mean that it is directly referenced by the main EXE
+ * or by one of its dependent DLLs. Delay-loaded DLL doesn't count as being
+ * implicitly loaded.
+ *
+ * See VC\crt\src\tlssup.c for reference. */
+
+/* The linker must not discard p_thread_callback_base. (We force a reference
+ * to this variable with a linker /INCLUDE:symbol pragma to ensure that.) If
+ * this variable is discarded, the OnThreadExit function will never be
+ * called. */
+#ifdef _WIN64
+
+/* .CRT section is merged with .rdata on x64 so it must be constant data. */
+#pragma const_seg(".CRT$XLC")
+/* When defining a const variable, it must have external linkage to be sure the
+ * linker doesn't discard it. */
+extern const PIMAGE_TLS_CALLBACK p_thread_callback_base;
+const PIMAGE_TLS_CALLBACK p_thread_callback_base = thread_local_destructor;
+/* Reset the default section. */
+#pragma const_seg()
+
+#else
+
+#pragma data_seg(".CRT$XLC")
+PIMAGE_TLS_CALLBACK p_thread_callback_base = thread_local_destructor;
+/* Reset the default section. */
+#pragma data_seg()
+
+#endif  /* _WIN64 */
+
+void *CRYPTO_get_thread_local(thread_local_data_t index) {
+  CRYPTO_once(&g_thread_local_init_once, thread_local_init);
+  if (g_thread_local_failed) {
+    return NULL;
+  }
+
+  void **pointers = TlsGetValue(g_thread_local_key);
+  if (pointers == NULL) {
+    return NULL;
+  }
+  return pointers[index];
+}
+
+int CRYPTO_set_thread_local(thread_local_data_t index, void *value,
+                            thread_local_destructor_t destructor) {
+  CRYPTO_once(&g_thread_local_init_once, thread_local_init);
+  if (g_thread_local_failed) {
+    destructor(value);
+    return 0;
+  }
+
+  void **pointers = TlsGetValue(g_thread_local_key);
+  if (pointers == NULL) {
+    pointers = OPENSSL_malloc(sizeof(void *) * NUM_OPENSSL_THREAD_LOCALS);
+    if (pointers == NULL) {
+      destructor(value);
+      return 0;
+    }
+    memset(pointers, 0, sizeof(void *) * NUM_OPENSSL_THREAD_LOCALS);
+    if (TlsSetValue(g_thread_local_key, pointers) == 0) {
+      OPENSSL_free(pointers);
+      destructor(value);
+      return 0;
+    }
+  }
+
+  EnterCriticalSection(&g_destructors_lock);
+  g_destructors[index] = destructor;
+  LeaveCriticalSection(&g_destructors_lock);
+
+  pointers[index] = value;
+  return 1;
+}
+
+#endif  /* OPENSSL_WINDOWS */
diff --git a/util/all_tests.go b/util/all_tests.go
index ded798e..5927257 100644
--- a/util/all_tests.go
+++ b/util/all_tests.go
@@ -81,6 +81,7 @@
 	{"crypto/modes/gcm_test"},
 	{"crypto/pkcs8/pkcs12_test"},
 	{"crypto/rsa/rsa_test"},
+	{"crypto/thread_test"},
 	{"crypto/x509/pkcs7_test"},
 	{"crypto/x509v3/tab_test"},
 	{"crypto/x509v3/v3name_test"},
