Use a pool of |rand_state| objects.
Previously we used thread-local state objects in rand.c. However, for
applications with large numbers of threads, this can lead to excessive
memory usage.
This change causes us to maintain a mutex-protected pool of state
objects where the size of the pool equals the maximum concurrency of
|RAND_bytes|. This might lead to state objects bouncing between CPUs
more often, but should help the memory usage problem.
Change-Id: Ie83763d3bc139e64ac17bf7e015ad082b2f8a81a
Reviewed-on: https://boringssl-review.googlesource.com/29565
Commit-Queue: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/delocate.h b/crypto/fipsmodule/delocate.h
index 065a21c..59effde 100644
--- a/crypto/fipsmodule/delocate.h
+++ b/crypto/fipsmodule/delocate.h
@@ -23,7 +23,7 @@
#if defined(BORINGSSL_FIPS) && !defined(OPENSSL_ASAN) && !defined(OPENSSL_MSAN)
#define DEFINE_BSS_GET(type, name) \
static type name __attribute__((used)); \
- type *name##_bss_get(void);
+ type *name##_bss_get(void) __attribute__((const));
// For FIPS builds we require that CRYPTO_ONCE_INIT be zero.
#define DEFINE_STATIC_ONCE(name) DEFINE_BSS_GET(CRYPTO_once_t, name)
// For FIPS builds we require that CRYPTO_STATIC_MUTEX_INIT be zero.
diff --git a/crypto/fipsmodule/rand/rand.c b/crypto/fipsmodule/rand/rand.c
index 3ec92e6..02e63bc 100644
--- a/crypto/fipsmodule/rand/rand.c
+++ b/crypto/fipsmodule/rand/rand.c
@@ -54,75 +54,6 @@
// continuous random number generator test in FIPS 140-2, section 4.9.2.
#define CRNGT_BLOCK_SIZE 16
-// rand_thread_state contains the per-thread state for the RNG.
-struct rand_thread_state {
- CTR_DRBG_STATE drbg;
- // calls is the number of generate calls made on |drbg| since it was last
- // (re)seeded. This is bound by |kReseedInterval|.
- unsigned calls;
- // last_block_valid is non-zero iff |last_block| contains data from
- // |CRYPTO_sysrand|.
- int last_block_valid;
-
-#if defined(BORINGSSL_FIPS)
- // last_block contains the previous block from |CRYPTO_sysrand|.
- uint8_t last_block[CRNGT_BLOCK_SIZE];
- // next and prev form a NULL-terminated, double-linked list of all states in
- // a process.
- struct rand_thread_state *next, *prev;
-#endif
-};
-
-#if defined(BORINGSSL_FIPS)
-// thread_states_list is the head of a linked-list of all |rand_thread_state|
-// objects in the process, one per thread. This is needed because FIPS requires
-// that they be zeroed on process exit, but thread-local destructors aren't
-// called when the whole process is exiting.
-DEFINE_BSS_GET(struct rand_thread_state *, thread_states_list);
-DEFINE_STATIC_MUTEX(thread_states_list_lock);
-
-static void rand_thread_state_clear_all(void) __attribute__((destructor));
-static void rand_thread_state_clear_all(void) {
- CRYPTO_STATIC_MUTEX_lock_write(thread_states_list_lock_bss_get());
- for (struct rand_thread_state *cur = *thread_states_list_bss_get();
- cur != NULL; cur = cur->next) {
- CTR_DRBG_clear(&cur->drbg);
- }
- // |thread_states_list_lock is deliberately left locked so that any threads
- // that are still running will hang if they try to call |RAND_bytes|.
-}
-#endif
-
-// rand_thread_state_free frees a |rand_thread_state|. This is called when a
-// thread exits.
-static void rand_thread_state_free(void *state_in) {
- struct rand_thread_state *state = state_in;
-
- if (state_in == NULL) {
- return;
- }
-
-#if defined(BORINGSSL_FIPS)
- CRYPTO_STATIC_MUTEX_lock_write(thread_states_list_lock_bss_get());
-
- if (state->prev != NULL) {
- state->prev->next = state->next;
- } else {
- *thread_states_list_bss_get() = state->next;
- }
-
- if (state->next != NULL) {
- state->next->prev = state->prev;
- }
-
- CRYPTO_STATIC_MUTEX_unlock_write(thread_states_list_lock_bss_get());
-
- CTR_DRBG_clear(&state->drbg);
-#endif
-
- OPENSSL_free(state);
-}
-
#if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM) && \
!defined(BORINGSSL_UNSAFE_DETERMINISTIC_MODE)
@@ -172,9 +103,31 @@
#endif
+// rand_state contains an RNG state.
+struct rand_state {
+ CTR_DRBG_STATE drbg;
+ // next forms a NULL-terminated linked-list of all free |rand_state| objects.
+ struct rand_state *next;
+ // calls is the number of generate calls made on |drbg| since it was last
+ // (re)seeded. This is bound by |kReseedInterval|.
+ unsigned calls;
+
+#if defined(BORINGSSL_FIPS)
+ // next_all forms another NULL-terminated linked-list, this time of all
+ // |rand_state| objects that have been allocated including those that might
+ // currently be in use.
+ struct rand_state *next_all;
+ // last_block contains the previous block from |CRYPTO_sysrand|.
+ uint8_t last_block[CRNGT_BLOCK_SIZE];
+ // last_block_valid is non-zero iff |last_block| contains data from
+ // |CRYPTO_sysrand|.
+ int last_block_valid;
+#endif
+};
+
#if defined(BORINGSSL_FIPS)
-static void rand_get_seed(struct rand_thread_state *state,
+static void rand_get_seed(struct rand_state *state,
uint8_t seed[CTR_DRBG_ENTROPY_LEN]) {
if (!state->last_block_valid) {
if (!hwrand(state->last_block, sizeof(state->last_block))) {
@@ -223,7 +176,7 @@
#else
-static void rand_get_seed(struct rand_thread_state *state,
+static void rand_get_seed(struct rand_state *state,
uint8_t seed[CTR_DRBG_ENTROPY_LEN]) {
// If not in FIPS mode, we don't overread from the system entropy source and
// we don't depend only on the hardware RDRAND.
@@ -232,6 +185,97 @@
#endif
+// rand_state_free_list is a list of currently free, |rand_state| structures.
+// When a thread needs a |rand_state| it picks the head element of this list and
+// allocs a new one if the list is empty. Once it's finished, it pushes the
+// state back onto the front of the list.
+//
+// Previously we used a thread-local state but for processes with large numbers
+// of threads this can result in excessive memory usage. Since we don't free
+// |rand_state| objects, the number of objects in memory will eventually equal
+// the maximum concurrency of |RAND_bytes|.
+DEFINE_BSS_GET(struct rand_state *, rand_state_free_list);
+
+// rand_state_lock protects |rand_state_free_list| (and |rand_state_all_list|,
+// in FIPS mode).
+DEFINE_STATIC_MUTEX(rand_state_lock);
+
+#if defined(BORINGSSL_FIPS)
+// rand_state_all_list is the head of a linked-list of all |rand_state| objects
+// in the process. This is needed because FIPS requires that they be zeroed on
+// process exit.
+DEFINE_BSS_GET(struct rand_state *, rand_state_all_list);
+
+// rand_drbg_lock is taken in write mode by |rand_state_clear_all|, and
+// in read mode by any operation on the |drbg| member of |rand_state|.
+// This ensures that, in the event that a thread races destructor functions, we
+// never return bogus random data. At worst, the thread will deadlock.
+DEFINE_STATIC_MUTEX(rand_drbg_lock);
+
+static void rand_state_clear_all(void) __attribute__((destructor));
+static void rand_state_clear_all(void) {
+ CRYPTO_STATIC_MUTEX_lock_write(rand_drbg_lock_bss_get());
+ CRYPTO_STATIC_MUTEX_lock_write(rand_state_lock_bss_get());
+ for (struct rand_state *cur = *rand_state_all_list_bss_get();
+ cur != NULL; cur = cur->next_all) {
+ CTR_DRBG_clear(&cur->drbg);
+ }
+ // Both locks are deliberately left locked so that any threads that are still
+ // running will hang if they try to call |RAND_bytes|.
+}
+#endif
+
+// rand_state_init seeds a |rand_state|.
+static void rand_state_init(struct rand_state *state) {
+ OPENSSL_memset(state, 0, sizeof(struct rand_state));
+ uint8_t seed[CTR_DRBG_ENTROPY_LEN];
+ rand_get_seed(state, seed);
+ if (!CTR_DRBG_init(&state->drbg, seed, NULL, 0)) {
+ abort();
+ }
+}
+
+// rand_state_get pops a |rand_state| from the head of
+// |rand_state_free_list| and returns it. If the list is empty, it
+// creates a fresh |rand_state| and returns that instead.
+static struct rand_state *rand_state_get(void) {
+ struct rand_state *state = NULL;
+ CRYPTO_STATIC_MUTEX_lock_write(rand_state_lock_bss_get());
+ state = *rand_state_free_list_bss_get();
+ if (state != NULL) {
+ *rand_state_free_list_bss_get() = state->next;
+ }
+ CRYPTO_STATIC_MUTEX_unlock_write(rand_state_lock_bss_get());
+
+ if (state != NULL) {
+ return state;
+ }
+
+ state = OPENSSL_malloc(sizeof(struct rand_state));
+ if (state == NULL) {
+ return NULL;
+ }
+
+ rand_state_init(state);
+
+#if defined(BORINGSSL_FIPS)
+ CRYPTO_STATIC_MUTEX_lock_write(rand_state_lock_bss_get());
+ state->next_all = *rand_state_all_list_bss_get();
+ *rand_state_all_list_bss_get() = state;
+ CRYPTO_STATIC_MUTEX_unlock_write(rand_state_lock_bss_get());
+#endif
+
+ return state;
+}
+
+// rand_state_put pushes |state| onto |rand_state_free_list|.
+static void rand_state_put(struct rand_state *state) {
+ CRYPTO_STATIC_MUTEX_lock_write(rand_state_lock_bss_get());
+ state->next = *rand_state_free_list_bss_get();
+ *rand_state_free_list_bss_get() = state;
+ CRYPTO_STATIC_MUTEX_unlock_write(rand_state_lock_bss_get());
+}
+
void RAND_bytes_with_additional_data(uint8_t *out, size_t out_len,
const uint8_t user_additional_data[32]) {
if (out_len == 0) {
@@ -259,41 +303,14 @@
additional_data[i] ^= user_additional_data[i];
}
- struct rand_thread_state stack_state;
- struct rand_thread_state *state =
- CRYPTO_get_thread_local(OPENSSL_THREAD_LOCAL_RAND);
+ struct rand_state stack_state;
+ struct rand_state *state = rand_state_get();
if (state == NULL) {
- state = OPENSSL_malloc(sizeof(struct rand_thread_state));
- if (state == NULL ||
- !CRYPTO_set_thread_local(OPENSSL_THREAD_LOCAL_RAND, state,
- rand_thread_state_free)) {
- // If the system is out of memory, use an ephemeral state on the
- // stack.
- state = &stack_state;
- }
-
- state->last_block_valid = 0;
- uint8_t seed[CTR_DRBG_ENTROPY_LEN];
- rand_get_seed(state, seed);
- if (!CTR_DRBG_init(&state->drbg, seed, NULL, 0)) {
- abort();
- }
- state->calls = 0;
-
-#if defined(BORINGSSL_FIPS)
- if (state != &stack_state) {
- CRYPTO_STATIC_MUTEX_lock_write(thread_states_list_lock_bss_get());
- struct rand_thread_state **states_list = thread_states_list_bss_get();
- state->next = *states_list;
- if (state->next != NULL) {
- state->next->prev = state;
- }
- state->prev = NULL;
- *states_list = state;
- CRYPTO_STATIC_MUTEX_unlock_write(thread_states_list_lock_bss_get());
- }
-#endif
+ // If the system is out of memory, use an ephemeral state on the
+ // stack.
+ state = &stack_state;
+ rand_state_init(state);
}
if (state->calls >= kReseedInterval) {
@@ -302,13 +319,13 @@
#if defined(BORINGSSL_FIPS)
// Take a read lock around accesses to |state->drbg|. This is needed to
// avoid returning bad entropy if we race with
- // |rand_thread_state_clear_all|.
+ // |rand_state_clear_all|.
//
// This lock must be taken after any calls to |CRYPTO_sysrand| to avoid a
// bug on ppc64le. glibc may implement pthread locks by wrapping user code
// in a hardware transaction, but, on some older versions of glibc and the
// kernel, syscalls made with |syscall| did not abort the transaction.
- CRYPTO_STATIC_MUTEX_lock_read(thread_states_list_lock_bss_get());
+ CRYPTO_STATIC_MUTEX_lock_read(rand_drbg_lock_bss_get());
#endif
if (!CTR_DRBG_reseed(&state->drbg, seed, NULL, 0)) {
abort();
@@ -316,7 +333,7 @@
state->calls = 0;
} else {
#if defined(BORINGSSL_FIPS)
- CRYPTO_STATIC_MUTEX_lock_read(thread_states_list_lock_bss_get());
+ CRYPTO_STATIC_MUTEX_lock_read(rand_drbg_lock_bss_get());
#endif
}
@@ -343,8 +360,12 @@
}
#if defined(BORINGSSL_FIPS)
- CRYPTO_STATIC_MUTEX_unlock_read(thread_states_list_lock_bss_get());
+ CRYPTO_STATIC_MUTEX_unlock_read(rand_drbg_lock_bss_get());
#endif
+
+ if (state != &stack_state) {
+ rand_state_put(state);
+ }
}
int RAND_bytes(uint8_t *out, size_t out_len) {
diff --git a/crypto/internal.h b/crypto/internal.h
index 3dde476..c4e2e51 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -528,7 +528,6 @@
// stored.
typedef enum {
OPENSSL_THREAD_LOCAL_ERR = 0,
- OPENSSL_THREAD_LOCAL_RAND,
OPENSSL_THREAD_LOCAL_TEST,
NUM_OPENSSL_THREAD_LOCALS,
} thread_local_data_t;