Fix RAND_enable_fork_unsafe_buffering when called after fork

If a process calls fork(), then the child process never forks again, the
child may wish to call RAND_enable_fork_unsafe_buffering(). However,
doing so exposes a bug: we assume that, if the flag is set, we don't
need to worry about fork-safety. But it is possible that the PRNG state
was cloned from another process which does not work.

Concretely, consider a zygote process, e.g. Chromium's. A zygote process
would retain fork-safety, but pass along its PRNG state to each of its
children. If the children never fork, they might disable fork-safety,
hitting this bug. (Chromium does not call this API. This is just a
hypothetical scenario.)

Fix this by reseeding whenever the fork-safety bit changes. This fix
does not strictly depend on the atomics work, but it causes us to
unconditionally sample rand_fork_unsafe_buffering_enabled(). This no
longer causes contention because it's just another atomic load.

This only affects systems without MADV_WIPEONFORK and without fast
RDRAND. If RDRAND is fast, we're always fork-safe and MADV_WIPEONFORK
allows us to efficiently detect forks.

Cq-Include-Trybots: luci.boringssl.try:linux_clang_rel_tsan
Change-Id: I6d0c471c62c951254faf85420a7dc3f4a9d65ee0
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/59850
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/rand/rand.c b/crypto/fipsmodule/rand/rand.c
index bf6b046..a3fc688 100644
--- a/crypto/fipsmodule/rand/rand.c
+++ b/crypto/fipsmodule/rand/rand.c
@@ -65,6 +65,9 @@
   // last_block_valid is non-zero iff |last_block| contains data from
   // |get_seed_entropy|.
   int last_block_valid;
+  // fork_unsafe_buffering is non-zero iff, when |drbg| was last (re)seeded,
+  // fork-unsafe buffering was enabled.
+  int fork_unsafe_buffering;
 
 #if defined(BORINGSSL_FIPS)
   // last_block contains the previous block from |get_seed_entropy|.
@@ -331,6 +334,7 @@
   }
 
   const uint64_t fork_generation = CRYPTO_get_fork_generation();
+  const int fork_unsafe_buffering = rand_fork_unsafe_buffering_enabled();
 
   // Additional data is mixed into every CTR-DRBG call to protect, as best we
   // can, against forks & VM clones. We do not over-read this information and
@@ -345,7 +349,7 @@
     // entropy is used. This can be expensive (one read per |RAND_bytes| call)
     // and so is disabled when we have fork detection, or if the application has
     // promised not to fork.
-    if (fork_generation != 0 || rand_fork_unsafe_buffering_enabled()) {
+    if (fork_generation != 0 || fork_unsafe_buffering) {
       OPENSSL_memset(additional_data, 0, sizeof(additional_data));
     } else if (!have_rdrand()) {
       // No alternative so block for OS entropy.
@@ -388,6 +392,7 @@
     }
     state->calls = 0;
     state->fork_generation = fork_generation;
+    state->fork_unsafe_buffering = fork_unsafe_buffering;
 
 #if defined(BORINGSSL_FIPS)
     CRYPTO_MUTEX_init(&state->clear_drbg_lock);
@@ -406,7 +411,14 @@
   }
 
   if (state->calls >= kReseedInterval ||
-      state->fork_generation != fork_generation) {
+      // If we've forked since |state| was last seeded, reseed.
+      state->fork_generation != fork_generation ||
+      // If |state| was seeded from a state with different fork-safety
+      // preferences, reseed. Suppose |state| was fork-safe, then forked into
+      // two children, but each of the children never fork and disable fork
+      // safety. The children must reseed to avoid working from the same PRNG
+      // state.
+      state->fork_unsafe_buffering != fork_unsafe_buffering) {
     uint8_t seed[CTR_DRBG_ENTROPY_LEN];
     uint8_t reseed_additional_data[CTR_DRBG_ENTROPY_LEN] = {0};
     size_t reseed_additional_data_len = 0;
@@ -424,6 +436,7 @@
     }
     state->calls = 0;
     state->fork_generation = fork_generation;
+    state->fork_unsafe_buffering = fork_unsafe_buffering;
   } else {
 #if defined(BORINGSSL_FIPS)
     CRYPTO_MUTEX_lock_read(&state->clear_drbg_lock);
diff --git a/crypto/rand_extra/rand_test.cc b/crypto/rand_extra/rand_test.cc
index 2ed1deb..1af28b0 100644
--- a/crypto/rand_extra/rand_test.cc
+++ b/crypto/rand_extra/rand_test.cc
@@ -56,7 +56,7 @@
 
 #if !defined(OPENSSL_WINDOWS) && !defined(OPENSSL_IOS) && \
     !defined(OPENSSL_FUCHSIA) && !defined(BORINGSSL_UNSAFE_DETERMINISTIC_MODE)
-static bool ForkAndRand(bssl::Span<uint8_t> out) {
+static bool ForkAndRand(bssl::Span<uint8_t> out, bool fork_unsafe_buffering) {
   int pipefds[2];
   if (pipe(pipefds) < 0) {
     perror("pipe");
@@ -76,6 +76,9 @@
   if (child == 0) {
     // This is the child. Generate entropy and write it to the parent.
     close(pipefds[0]);
+    if (fork_unsafe_buffering) {
+      RAND_enable_fork_unsafe_buffering(-1);
+    }
     RAND_bytes(out.data(), out.size());
     while (!out.empty()) {
       ssize_t ret = write(pipefds[1], out.data(), out.size());
@@ -136,18 +139,27 @@
   // intentionally uses smaller buffers than the others, to minimize the chance
   // of sneaking by with a large enough buffer that we've since reseeded from
   // the OS.
-  uint8_t buf1[16], buf2[16], buf3[16];
-  ASSERT_TRUE(ForkAndRand(buf1));
-  ASSERT_TRUE(ForkAndRand(buf2));
-  RAND_bytes(buf3, sizeof(buf3));
+  //
+  // All child processes should have different PRNGs, including the ones that
+  // disavow fork-safety. Although they are produced by fork, they themselves do
+  // not fork after that call.
+  uint8_t bufs[5][16];
+  ASSERT_TRUE(ForkAndRand(bufs[0], /*fork_unsafe_buffering=*/false));
+  ASSERT_TRUE(ForkAndRand(bufs[1], /*fork_unsafe_buffering=*/false));
+  ASSERT_TRUE(ForkAndRand(bufs[2], /*fork_unsafe_buffering=*/true));
+  ASSERT_TRUE(ForkAndRand(bufs[3], /*fork_unsafe_buffering=*/true));
+  RAND_bytes(bufs[4], sizeof(bufs[4]));
 
-  // All should be different.
-  EXPECT_NE(Bytes(buf1), Bytes(buf2));
-  EXPECT_NE(Bytes(buf2), Bytes(buf3));
-  EXPECT_NE(Bytes(buf1), Bytes(buf3));
-  EXPECT_NE(Bytes(buf1), Bytes(kZeros));
-  EXPECT_NE(Bytes(buf2), Bytes(kZeros));
-  EXPECT_NE(Bytes(buf3), Bytes(kZeros));
+  // All should be different and non-zero.
+  for (const auto &buf : bufs) {
+    EXPECT_NE(Bytes(buf), Bytes(kZeros));
+  }
+  for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(bufs); i++) {
+    for (size_t j = 0; j < i; j++) {
+      EXPECT_NE(Bytes(bufs[i]), Bytes(bufs[j]))
+          << "buffers " << i << " and " << j << " matched";
+    }
+  }
 }
 #endif  // !OPENSSL_WINDOWS && !OPENSSL_IOS &&
         // !OPENSSL_FUCHSIA && !BORINGSSL_UNSAFE_DETERMINISTIC_MODE