rand: new-style locking and support rdrand. Pure /dev/urandom, no buffering (previous behaviour): Did 2320000 RNG (16 bytes) operations in 3000082us (773312.2 ops/sec): 12.4 MB/s Did 209000 RNG (256 bytes) operations in 3011984us (69389.5 ops/sec): 17.8 MB/s Did 6851 RNG (8192 bytes) operations in 3052027us (2244.7 ops/sec): 18.4 MB/s Pure rdrand speed: Did 34930500 RNG (16 bytes) operations in 3000021us (11643418.5 ops/sec): 186.3 MB/s Did 2444000 RNG (256 bytes) operations in 3000164us (814622.1 ops/sec): 208.5 MB/s Did 80000 RNG (8192 bytes) operations in 3020968us (26481.6 ops/sec): 216.9 MB/s rdrand + ChaCha (as in this change): Did 19498000 RNG (16 bytes) operations in 3000086us (6499147.0 ops/sec): 104.0 MB/s Did 1964000 RNG (256 bytes) operations in 3000566us (654543.2 ops/sec): 167.6 MB/s Did 62000 RNG (8192 bytes) operations in 3034090us (20434.5 ops/sec): 167.4 MB/s Change-Id: Ie17045650cfe75858e4498ac28dbc4dcf8338376 Reviewed-on: https://boringssl-review.googlesource.com/4328 Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/internal.h b/crypto/internal.h index 6a8d5b2..9c5d487 100644 --- a/crypto/internal.h +++ b/crypto/internal.h
@@ -434,6 +434,7 @@ * stored. */ typedef enum { OPENSSL_THREAD_LOCAL_ERR = 0, + OPENSSL_THREAD_LOCAL_RAND, OPENSSL_THREAD_LOCAL_TEST, NUM_OPENSSL_THREAD_LOCALS, } thread_local_data_t;
diff --git a/crypto/rand/CMakeLists.txt b/crypto/rand/CMakeLists.txt index 23c1b24..374d8f1 100644 --- a/crypto/rand/CMakeLists.txt +++ b/crypto/rand/CMakeLists.txt
@@ -1,5 +1,13 @@ include_directories(. .. ../../include) +if (${ARCH} STREQUAL "x86_64") + set( + RAND_ARCH_SOURCES + + rdrand-x86_64.${ASM_EXT} + ) +endif() + add_library( rand @@ -8,4 +16,9 @@ rand.c urandom.c windows.c + hwrand.c + + ${RAND_ARCH_SOURCES} ) + +perlasm(rdrand-x86_64.${ASM_EXT} asm/rdrand-x86_64.pl)
diff --git a/crypto/rand/asm/rdrand-x86_64.pl b/crypto/rand/asm/rdrand-x86_64.pl new file mode 100644 index 0000000..a917611 --- /dev/null +++ b/crypto/rand/asm/rdrand-x86_64.pl
@@ -0,0 +1,25 @@ +#!/usr/bin/env perl + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +print<<___; +.text + +.globl CRYPTO_rdrand +.type CRYPTO_rdrand,\@function,1 +.align 16 +CRYPTO_rdrand: + .byte 0x48, 0x0f, 0xc7, 0xf0 + retq +___ + +close STDOUT; # flush
diff --git a/crypto/rand/hwrand.c b/crypto/rand/hwrand.c new file mode 100644 index 0000000..0d2833a --- /dev/null +++ b/crypto/rand/hwrand.c
@@ -0,0 +1,56 @@ +/* Copyright (c) 2015, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + +#include <openssl/rand.h> + +#include <stdlib.h> +#include <string.h> + +#include <openssl/cpu.h> + + +#if defined(OPENSSL_X86_64) + +int CRYPTO_have_hwrand(void) { + return (OPENSSL_ia32cap_P[1] & (1u << 30)) != 0; +} + +/* CRYPTO_rdrand is defined in asm/rdrand-x86_64.pl */ +extern uint64_t CRYPTO_rdrand(); + +void CRYPTO_hwrand(uint8_t *buf, size_t len) { + while (len >= 8) { + uint64_t rand = CRYPTO_rdrand(); + memcpy(buf, &rand, sizeof(rand)); + len -= sizeof(rand); + buf += sizeof(rand); + } + + if (len > 0) { + uint64_t rand = CRYPTO_rdrand(); + memcpy(buf, &rand, len); + } +} + +#else + +int CRYPTO_have_hwrand(void) { + return 0; +} + +void CRYPTO_hwrand(uint8_t *buf, size_t len) { + abort(); +} + +#endif
diff --git a/crypto/rand/internal.h b/crypto/rand/internal.h new file mode 100644 index 0000000..1cca7f3 --- /dev/null +++ b/crypto/rand/internal.h
@@ -0,0 +1,40 @@ +/* Copyright (c) 2015, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + +#ifndef OPENSSL_HEADER_CRYPTO_RAND_INTERNAL_H +#define OPENSSL_HEADER_CRYPTO_RAND_INTERNAL_H + +#if defined(__cplusplus) +extern "C" { +#endif + + +/* CRYPTO_sysrand fills |len| bytes at |buf| with entropy from the operating + * system. */ +void CRYPTO_sysrand(uint8_t *buf, size_t len); + +/* CRYPTO_have_hwrand returns one iff |CRYPTO_hwrand| can be called to generate + * hardware entropy. */ +int CRYPTO_have_hwrand(void); + +/* CRYPTO_hwrand fills |len| bytes at |buf| with entropy from the hardware. + * This function can only be called if |CRYPTO_have_hwrand| returns one. */ +void CRYPTO_hwrand(uint8_t *buf, size_t len); + + +#if defined(__cplusplus) +} /* extern C */ +#endif + +#endif /* OPENSSL_HEADER_CRYPTO_RAND_INTERNAL_H */
diff --git a/crypto/rand/rand.c b/crypto/rand/rand.c index 6780b6c..5f94a15 100644 --- a/crypto/rand/rand.c +++ b/crypto/rand/rand.c
@@ -14,6 +14,134 @@ #include <openssl/rand.h> +#include <string.h> + +#include <openssl/mem.h> + +#include "internal.h" +#include "../internal.h" + + +/* It's assumed that the operating system always has an unfailing source of + * entropy which is accessed via |CRYPTO_sysrand|. (If the operating system + * entropy source fails, it's up to |CRYPTO_sysrand| to abort the process—we + * don't try to handle it.) + * + * In addition, the hardware may provide a low-latency RNG. Intel's rdrand + * instruction is the canonical example of this. When a hardware RNG is + * available we don't need to worry about an RNG failure arising from fork()ing + * the process or moving a VM, so we can keep thread-local RNG state and XOR + * the hardware entropy in. + * + * (We assume that the OS entropy is safe from fork()ing and VM duplication. + * This might be a bit of a leap of faith, esp on Windows, but there's nothing + * that we can do about it.) */ + +/* rand_thread_state contains the per-thread state for the RNG. This is only + * used if the system has support for a hardware RNG. */ +struct rand_thread_state { + uint8_t key[32]; + uint64_t calls_used; + size_t bytes_used; + uint8_t partial_block[64]; + unsigned partial_block_used; +}; + +/* kMaxCallsPerRefresh is the maximum number of |RAND_bytes| calls that we'll + * serve before reading a new key from the operating system. This only applies + * if we have a hardware RNG. */ +static const unsigned kMaxCallsPerRefresh = 1024; + +/* kMaxBytesPerRefresh is the maximum number of bytes that we'll return from + * |RAND_bytes| before reading a new key from the operating system. This only + * applies if we have a hardware RNG. */ +static const uint64_t kMaxBytesPerRefresh = 1024 * 1024; + +/* rand_thread_state_free frees a |rand_thread_state|. This is called when a + * thread exits. */ +static void rand_thread_state_free(void *state) { + if (state == NULL) { + return; + } + + OPENSSL_cleanse(state, sizeof(struct rand_thread_state)); + OPENSSL_free(state); +} + +extern void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len, + const uint8_t key[32], const uint8_t nonce[8], + size_t counter); + +int RAND_bytes(uint8_t *buf, const size_t len) { + if (len == 0) { + return 1; + } + + if (!CRYPTO_have_hwrand()) { + /* Without a hardware RNG to save us from address-space duplication, the OS + * entropy is used directly. */ + CRYPTO_sysrand(buf, len); + return 1; + } + + struct rand_thread_state *state = + CRYPTO_get_thread_local(OPENSSL_THREAD_LOCAL_RAND); + if (state == NULL) { + state = OPENSSL_malloc(sizeof(struct rand_thread_state)); + if (state == NULL || + !CRYPTO_set_thread_local(OPENSSL_THREAD_LOCAL_RAND, state, + rand_thread_state_free)) { + CRYPTO_sysrand(buf, len); + return 1; + } + + state->calls_used = kMaxCallsPerRefresh; + } + + if (state->calls_used >= kMaxCallsPerRefresh || + state->bytes_used >= kMaxBytesPerRefresh) { + CRYPTO_sysrand(state->key, sizeof(state->key)); + state->calls_used = 0; + state->bytes_used = 0; + state->partial_block_used = sizeof(state->partial_block); + } + + CRYPTO_hwrand(buf, len); + + if (len >= sizeof(state->partial_block)) { + size_t remaining = len; + while (remaining > 0) { + // kMaxBytesPerCall is only 2GB, while ChaCha can handle 256GB. But this + // is sufficient and easier on 32-bit. + static const size_t kMaxBytesPerCall = 0x80000000; + size_t todo = remaining; + if (todo > kMaxBytesPerCall) { + todo = kMaxBytesPerCall; + } + CRYPTO_chacha_20(buf, buf, todo, state->key, + (uint8_t *)&state->calls_used, 0); + buf += todo; + remaining -= todo; + state->calls_used++; + } + } else { + if (sizeof(state->partial_block) - state->partial_block_used < len) { + CRYPTO_chacha_20(state->partial_block, state->partial_block, + sizeof(state->partial_block), state->key, + (uint8_t *)&state->calls_used, 0); + state->partial_block_used = 0; + } + + unsigned i; + for (i = 0; i < len; i++) { + buf[i] ^= state->partial_block[state->partial_block_used++]; + } + state->calls_used++; + } + state->bytes_used += len; + + return 1; +} int RAND_pseudo_bytes(uint8_t *buf, size_t len) { return RAND_bytes(buf, len);
diff --git a/crypto/rand/urandom.c b/crypto/rand/urandom.c index 05043fe..788a979 100644 --- a/crypto/rand/urandom.c +++ b/crypto/rand/urandom.c
@@ -25,6 +25,9 @@ #include <openssl/thread.h> #include <openssl/mem.h> +#include "internal.h" +#include "../internal.h" + /* This file implements a PRNG by reading from /dev/urandom, optionally with a * fork-safe buffer. @@ -72,20 +75,22 @@ /* rand_bytes_per_buf is the number of actual entropy bytes in a buffer. */ static const size_t rand_bytes_per_buf = BUF_SIZE - sizeof(struct rand_buffer); +static struct CRYPTO_STATIC_MUTEX global_lock = CRYPTO_STATIC_MUTEX_INIT; + /* list_head is the start of a global, linked-list of rand_buffer objects. It's - * protected by CRYPTO_LOCK_RAND. */ + * protected by |global_lock|. */ static struct rand_buffer *list_head; /* urandom_fd is a file descriptor to /dev/urandom. It's protected by - * CRYPTO_LOCK_RAND. */ + * |global_lock|. */ static int urandom_fd = -2; /* urandom_buffering controls whether buffering is enabled (1) or not (0). This - * is protected by CRYPTO_LOCK_RAND. */ + * is protected by |global_lock|. */ static int urandom_buffering = 0; /* urandom_get_fd_locked returns a file descriptor to /dev/urandom. The caller - * of this function must hold CRYPTO_LOCK_RAND. */ + * of this function must hold |global_lock|. */ static int urandom_get_fd_locked(void) { if (urandom_fd != -2) { return urandom_fd; @@ -100,7 +105,7 @@ void RAND_cleanup(void) { struct rand_buffer *cur; - CRYPTO_w_lock(CRYPTO_LOCK_RAND); + CRYPTO_STATIC_MUTEX_lock_write(&global_lock); while ((cur = list_head)) { list_head = cur->next; OPENSSL_free(cur); @@ -110,7 +115,7 @@ } urandom_fd = -2; list_head = NULL; - CRYPTO_w_unlock(CRYPTO_LOCK_RAND); + CRYPTO_STATIC_MUTEX_unlock(&global_lock); } /* read_full reads exactly |len| bytes from |fd| into |out| and returns 1. In @@ -133,36 +138,34 @@ return 1; } -/* urandom_rand_pseudo_bytes puts |num| random bytes into |out|. It returns - * one on success and zero otherwise. */ -int RAND_bytes(uint8_t *out, size_t requested) { +/* CRYPTO_sysrand puts |num| random bytes into |out|. */ +void CRYPTO_sysrand(uint8_t *out, size_t requested) { int fd; struct rand_buffer *buf; size_t todo; pid_t pid, ppid; if (requested == 0) { - return 1; + return; } - CRYPTO_w_lock(CRYPTO_LOCK_RAND); + CRYPTO_STATIC_MUTEX_lock_write(&global_lock); fd = urandom_get_fd_locked(); if (fd < 0) { - CRYPTO_w_unlock(CRYPTO_LOCK_RAND); + CRYPTO_STATIC_MUTEX_unlock(&global_lock); abort(); - return 0; + return; } /* If buffering is not enabled, or if the request is large, then the * result comes directly from urandom. */ if (!urandom_buffering || requested > BUF_SIZE / 2) { - CRYPTO_w_unlock(CRYPTO_LOCK_RAND); + CRYPTO_STATIC_MUTEX_unlock(&global_lock); if (!read_full(fd, out, requested)) { abort(); - return 0; } - return 1; + return; } pid = getpid(); @@ -174,8 +177,8 @@ rand_bytes_per_buf - buf->used >= requested) { memcpy(out, &buf->rand[buf->used], requested); buf->used += requested; - CRYPTO_w_unlock(CRYPTO_LOCK_RAND); - return 1; + CRYPTO_STATIC_MUTEX_unlock(&global_lock); + return; } /* If we don't immediately have enough entropy with the correct @@ -184,13 +187,13 @@ if (buf) { list_head = buf->next; } - CRYPTO_w_unlock(CRYPTO_LOCK_RAND); + CRYPTO_STATIC_MUTEX_unlock(&global_lock); if (!buf) { buf = (struct rand_buffer *)OPENSSL_malloc(BUF_SIZE); if (!buf) { abort(); - return 0; + return; } /* The buffer doesn't contain any random bytes yet * so we mark it as fully used so that it will be @@ -208,7 +211,7 @@ /* We have forked and so cannot use these bytes as they * may have been used in another process. */ OPENSSL_free(buf); - CRYPTO_w_lock(CRYPTO_LOCK_RAND); + CRYPTO_STATIC_MUTEX_lock_write(&global_lock); } while (requested > 0) { @@ -228,18 +231,17 @@ if (!read_full(fd, buf->rand, rand_bytes_per_buf)) { OPENSSL_free(buf); abort(); - return 0; + return; } buf->used = 0; } - CRYPTO_w_lock(CRYPTO_LOCK_RAND); + CRYPTO_STATIC_MUTEX_lock_write(&global_lock); assert(list_head != buf); buf->next = list_head; list_head = buf; - CRYPTO_w_unlock(CRYPTO_LOCK_RAND); - return 1; + CRYPTO_STATIC_MUTEX_unlock(&global_lock); } #endif /* !OPENSSL_WINDOWS */
diff --git a/crypto/rand/windows.c b/crypto/rand/windows.c index 66d977f..7bfcb1d 100644 --- a/crypto/rand/windows.c +++ b/crypto/rand/windows.c
@@ -32,11 +32,13 @@ #pragma warning(pop) +#include "internal.h" + void RAND_cleanup(void) { } -int RAND_bytes(uint8_t *out, size_t requested) { +void CRYPTO_sysrand(uint8_t *out, size_t requested) { while (requested > 0) { ULONG output_bytes_this_pass = ULONG_MAX; if (requested < output_bytes_this_pass) { @@ -48,7 +50,7 @@ requested -= output_bytes_this_pass; out += output_bytes_this_pass; } - return 1; + return; } #endif /* OPENSSL_WINDOWS */
diff --git a/include/openssl/rand.h b/include/openssl/rand.h index 8e3bc30..0b2ead8 100644 --- a/include/openssl/rand.h +++ b/include/openssl/rand.h
@@ -25,8 +25,7 @@ /* Random number generation. */ -/* RAND_bytes writes |len| bytes of random data to |buf|. It returns one on - * success and zero on otherwise. */ +/* RAND_bytes writes |len| bytes of random data to |buf| and returns one. */ OPENSSL_EXPORT int RAND_bytes(uint8_t *buf, size_t len); /* RAND_cleanup frees any resources used by the RNG. This is not safe if other