Fix non-blocking connect completion check in BIO.

Previously, we polled getsockopt(SO_ERROR) to check if a non-blocking connect
had completed, but this isn't right. SO_ERROR is only set *after* the
connect succeeded or failed.

Until then, you have to wait until the socket is writable. Per
connect(2) on Linux:

> The socket is nonblocking and the connection cannot be completed
> immediately.  (UNIX domain sockets failed with EAGAIN instead.)  It
> is  possible to select(2) or poll(2) for completion by selecting the
> socket for writing.  After select(2) indicates writability, use
> getsockopt(2) to read the SO_ERROR option at level SOL_SOCKET to
> determine whether connect()  completed  successfully  (SO_ERROR  is
> zero) or unsuccessfully (SO_ERROR is one of the usual error codes
> listed here, explaining the reason for the failure).

Windows documentation says something similar:

> With a nonblocking socket, the connection attempt cannot be completed
> immediately. In this case, connect will return SOCKET_ERROR, and
> WSAGetLastError will return WSAEWOULDBLOCK. In this case, there are
> three possible scenarios:
>
> * Use the select function to determine the completion of the
>   connection request by checking to see if the socket is writable.

https://learn.microsoft.com/en-us/windows/win32/api/winsock2/nf-winsock2-connect

This means that, even though BIO is generally not responsible for
calling select/poll, we need to do it here. Use poll on POSIX because
select cannot monitor fds beyond FD_SETSIZE. (Winsock's select does not
have this limitation. nfds is ignored.)

As part of this, remove the weird zero return from conn_state when
connect fails the second time around. It would make more sense to behave
the same between the two, and this actually would have caused BIO_read
to mistakenly report EOF when implicitly driving the connect state. No
one could have been relying on this because this code didn't work
anyway. To that end, it seems unlikely anyone is using non-blocking
connect BIOs because they don't work at all.

Change-Id: Iaf4c58920e686eadcfbb215f43cf9d90fa35f56d
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/96189
Auto-Submit: David Benjamin <davidben@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/bio/bio_test.cc b/crypto/bio/bio_test.cc
index 3c01952..8aa41cb 100644
--- a/crypto/bio/bio_test.cc
+++ b/crypto/bio/bio_test.cc
@@ -196,23 +196,32 @@
   // there's an issue.
   static const int kTimeoutSeconds = 5;
 #if defined(OPENSSL_WINDOWS)
-  fd_set read_set, write_set;
+  fd_set read_set, write_set, except_set;
   FD_ZERO(&read_set);
   FD_ZERO(&write_set);
+  FD_ZERO(&except_set);
   fd_set *wait_set = wait_type == WaitType::kRead ? &read_set : &write_set;
   FD_SET(sock, wait_set);
+  FD_SET(sock, &except_set);
   timeval timeout;
   timeout.tv_sec = kTimeoutSeconds;
   timeout.tv_usec = 0;
-  if (select(0 /* unused on Windows */, &read_set, &write_set, nullptr,
+  if (select(0 /* unused on Windows */, &read_set, &write_set, &except_set,
              &timeout) <= 0) {
     return false;
   }
-  return FD_ISSET(sock, wait_set);
+  return FD_ISSET(sock, wait_set) || FD_ISSET(sock, &except_set);
 #else
-  short events = wait_type == WaitType::kRead ? POLLIN : POLLOUT;
-  pollfd fd = {/*fd=*/sock, events, /*revents=*/0};
-  return poll(&fd, 1, kTimeoutSeconds * 1000) == 1 && (fd.revents & events);
+  pollfd pfd;
+  pfd.fd = sock;
+  // poll implicitly listens for POLLERR and POLLHUP.
+  pfd.events = wait_type == WaitType::kRead ? POLLIN : POLLOUT;
+  pfd.revents = 0;
+  int ret;
+  do {
+    ret = poll(&pfd, 1, kTimeoutSeconds * 1000);
+  } while (ret < 0 && errno == EINTR);
+  return ret == 1 && pfd.revents != 0;
 #endif
 }
 
@@ -234,18 +243,7 @@
              ntohs(addr.ToIPv4().sin_port));
   }
 
-  // Using a connect BIO implicitly connects to it.
-  {
-    // Connect to it with a connect BIO.
-    UniquePtr<BIO> bio(BIO_new_connect(hostname));
-    ASSERT_TRUE(bio);
-
-    // Write a test message to the BIO. This is assumed to be smaller than the
-    // transport buffer.
-    ASSERT_EQ(static_cast<int>(sizeof(kTestMessage)),
-              BIO_write(bio.get(), kTestMessage, sizeof(kTestMessage)))
-        << LastSocketError();
-
+  auto accept_socket_and_read = [&] {
     // Accept the socket.
     OwnedSocket sock(accept(listening_sock.get(), addr.addr_mut(), &addr.len));
     ASSERT_TRUE(sock.is_valid()) << LastSocketError();
@@ -257,6 +255,20 @@
         << LastSocketError();
     EXPECT_EQ(Bytes(kTestMessage, sizeof(kTestMessage)),
               Bytes(buf, sizeof(buf)));
+  };
+
+  // Using a connect BIO implicitly connects to it.
+  {
+    // Connect to it with a connect BIO.
+    UniquePtr<BIO> bio(BIO_new_connect(hostname));
+    ASSERT_TRUE(bio);
+
+    // Write a test message to the BIO. This is assumed to be smaller than the
+    // transport buffer.
+    ASSERT_EQ(static_cast<int>(sizeof(kTestMessage)),
+              BIO_write(bio.get(), kTestMessage, sizeof(kTestMessage)))
+        << LastSocketError();
+    accept_socket_and_read();
   }
 
   // Explicitly connect to the BIO first.
@@ -268,16 +280,7 @@
     ASSERT_EQ(static_cast<int>(sizeof(kTestMessage)),
               BIO_write(bio.get(), kTestMessage, sizeof(kTestMessage)))
         << LastSocketError();
-
-    // Accept and read.
-    OwnedSocket sock(accept(listening_sock.get(), addr.addr_mut(), &addr.len));
-    ASSERT_TRUE(sock.is_valid()) << LastSocketError();
-    char buf[sizeof(kTestMessage)];
-    ASSERT_EQ(static_cast<int>(sizeof(kTestMessage)),
-              recv(sock.get(), buf, sizeof(buf), 0))
-        << LastSocketError();
-    EXPECT_EQ(Bytes(kTestMessage, sizeof(kTestMessage)),
-              Bytes(buf, sizeof(buf)));
+    accept_socket_and_read();
   }
 
   // Connect in non-blocking mode.
@@ -286,30 +289,39 @@
     ASSERT_TRUE(bio);
     ASSERT_EQ(1, BIO_set_nbio(bio.get(), 1));
 
+    // Although this is a loopback connection, this seems to consistently
+    // trigger EINPROGRESS on our supported platforms so far. Require this for
+    // now, as it is convenient for ensuring test coverage. If the test flakes
+    // on some platform, we may need to tolerate either outcome.
     ASSERT_EQ(-1, BIO_do_connect(bio.get()));
     EXPECT_TRUE(BIO_should_retry(bio.get()));
     EXPECT_TRUE(BIO_should_io_special(bio.get()));
     EXPECT_EQ(BIO_RR_CONNECT, BIO_get_retry_reason(bio.get()));
 
-    // Wait for the underlying socket to become writable and try again.
-    int fd = BIO_get_fd(bio.get(), nullptr);
-    ASSERT_GT(fd, -1);
-    ASSERT_TRUE(WaitForSocket(static_cast<Socket>(fd), WaitType::kWrite));
-    ASSERT_EQ(1, BIO_do_connect(bio.get()));
+    // Try again, without waiting on the socket, to test that we correctly pick
+    // up the new socket state. It is possible the socket is ready anyway, in
+    // which case this test run will not exercise this case, but most likely it
+    // will still be waiting.
+    int ret = BIO_do_connect(bio.get());
+    if (ret <= 0) {
+      EXPECT_EQ(ret, -1);
+      EXPECT_TRUE(BIO_should_retry(bio.get()));
+      EXPECT_TRUE(BIO_should_io_special(bio.get()));
+      EXPECT_EQ(BIO_RR_CONNECT, BIO_get_retry_reason(bio.get()));
+
+      // Wait for the underlying socket to become writable and try again.
+      int fd = BIO_get_fd(bio.get(), nullptr);
+      ASSERT_GT(fd, -1);
+      ASSERT_TRUE(WaitForSocket(static_cast<Socket>(fd), WaitType::kWrite));
+      ASSERT_EQ(1, BIO_do_connect(bio.get()));
+    } else {
+      EXPECT_EQ(ret, 1);
+    }
 
     ASSERT_EQ(static_cast<int>(sizeof(kTestMessage)),
               BIO_write(bio.get(), kTestMessage, sizeof(kTestMessage)))
         << LastSocketError();
-
-    // Accept and read.
-    OwnedSocket sock(accept(listening_sock.get(), addr.addr_mut(), &addr.len));
-    ASSERT_TRUE(sock.is_valid()) << LastSocketError();
-    char buf[sizeof(kTestMessage)];
-    ASSERT_EQ(static_cast<int>(sizeof(kTestMessage)),
-              recv(sock.get(), buf, sizeof(buf), 0))
-        << LastSocketError();
-    EXPECT_EQ(Bytes(kTestMessage, sizeof(kTestMessage)),
-              Bytes(buf, sizeof(buf)));
+    accept_socket_and_read();
   }
 
   // Implicitly connect in non-blocking mode.
@@ -318,28 +330,117 @@
     ASSERT_TRUE(bio);
     ASSERT_EQ(1, BIO_set_nbio(bio.get(), 1));
 
+    // Although this is a loopback connection, this seems to consistently
+    // trigger EINPROGRESS on our supported platforms so far. Require this for
+    // now, as it is convenient for ensuring test coverage. If the test flakes
+    // on some platform, we may need to tolerate either outcome.
     ASSERT_EQ(-1, BIO_write(bio.get(), kTestMessage, sizeof(kTestMessage)));
     EXPECT_TRUE(BIO_should_retry(bio.get()));
     EXPECT_TRUE(BIO_should_io_special(bio.get()));
     EXPECT_EQ(BIO_RR_CONNECT, BIO_get_retry_reason(bio.get()));
 
-    // Wait for the underlying socket to become writable and try again.
+    // Try again, without waiting on the socket, to test that we correctly pick
+    // up the new socket state. It is possible the socket is ready anyway, in
+    // which case this test run will not exercise this case, but most likely it
+    // will still be waiting.
+    int ret = BIO_write(bio.get(), kTestMessage, sizeof(kTestMessage));
+    if (ret <= 0) {
+      EXPECT_EQ(ret, -1);
+      EXPECT_TRUE(BIO_should_retry(bio.get()));
+      EXPECT_TRUE(BIO_should_io_special(bio.get()));
+      EXPECT_EQ(BIO_RR_CONNECT, BIO_get_retry_reason(bio.get()));
+
+      // Wait for the underlying socket to become writable and try again.
+      int fd = BIO_get_fd(bio.get(), nullptr);
+      ASSERT_GT(fd, -1);
+      ASSERT_TRUE(WaitForSocket(static_cast<Socket>(fd), WaitType::kWrite));
+      ASSERT_EQ(static_cast<int>(sizeof(kTestMessage)),
+                BIO_write(bio.get(), kTestMessage, sizeof(kTestMessage)))
+          << LastSocketError();
+    } else {
+      ASSERT_EQ(static_cast<int>(sizeof(kTestMessage)), ret);
+    }
+
+    accept_socket_and_read();
+  }
+
+  // Close the socket to test failed connects.
+  listening_sock.reset();
+
+  auto expect_connect_error = [](BIO *bio) {
+    EXPECT_FALSE(BIO_should_retry(bio));
+    EXPECT_FALSE(BIO_should_io_special(bio));
+#if defined(OPENSSL_WINDOWS)
+    EXPECT_EQ(WSAGetLastError(), WSAECONNREFUSED);
+    // TODO(crbug.com/518014953): Also check the error queue.
+#else
+    EXPECT_EQ(errno, ECONNREFUSED);
+    EXPECT_TRUE(ErrorEquals(ERR_get_error(), ERR_LIB_SYS, ECONNREFUSED));
+#endif
+    ERR_clear_error();
+  };
+
+  // |BIO_do_connect| should observe the error.
+  {
+    UniquePtr<BIO> bio(BIO_new_connect(hostname));
+    ASSERT_TRUE(bio);
+    EXPECT_EQ(-1, BIO_do_connect(bio.get()));
+    expect_connect_error(bio.get());
+  }
+
+  // Using a connect BIO implicitly connects to it, which should observe the
+  // error.
+  {
+    UniquePtr<BIO> bio(BIO_new_connect(hostname));
+    ASSERT_TRUE(bio);
+    EXPECT_EQ(-1, BIO_write(bio.get(), kTestMessage, sizeof(kTestMessage)));
+    expect_connect_error(bio.get());
+  }
+
+  // Connect in non-blocking mode.
+  {
+    UniquePtr<BIO> bio(BIO_new_connect(hostname));
+    ASSERT_TRUE(bio);
+    ASSERT_EQ(1, BIO_set_nbio(bio.get(), 1));
+
+    // Although this is a loopback connection, this seems to consistently
+    // trigger EINPROGRESS on our supported platforms so far. Require this for
+    // now, as it is convenient for ensuring test coverage. If the test flakes
+    // on some platform, we may need to tolerate either outcome.
+    ASSERT_EQ(-1, BIO_do_connect(bio.get()));
+    EXPECT_TRUE(BIO_should_retry(bio.get()));
+    EXPECT_TRUE(BIO_should_io_special(bio.get()));
+    EXPECT_EQ(BIO_RR_CONNECT, BIO_get_retry_reason(bio.get()));
+
+    // Wait for the underlying socket to observe the error and try again.
     int fd = BIO_get_fd(bio.get(), nullptr);
     ASSERT_GT(fd, -1);
     ASSERT_TRUE(WaitForSocket(static_cast<Socket>(fd), WaitType::kWrite));
-    ASSERT_EQ(static_cast<int>(sizeof(kTestMessage)),
-              BIO_write(bio.get(), kTestMessage, sizeof(kTestMessage)))
-        << LastSocketError();
+    ASSERT_EQ(-1, BIO_do_connect(bio.get()));
+    expect_connect_error(bio.get());
+  }
 
-    // Accept and read.
-    OwnedSocket sock(accept(listening_sock.get(), addr.addr_mut(), &addr.len));
-    ASSERT_TRUE(sock.is_valid()) << LastSocketError();
-    char buf[sizeof(kTestMessage)];
-    ASSERT_EQ(static_cast<int>(sizeof(kTestMessage)),
-              recv(sock.get(), buf, sizeof(buf), 0))
-        << LastSocketError();
-    EXPECT_EQ(Bytes(kTestMessage, sizeof(kTestMessage)),
-              Bytes(buf, sizeof(buf)));
+  // Implicitly connect in non-blocking mode.
+  {
+    UniquePtr<BIO> bio(BIO_new_connect(hostname));
+    ASSERT_TRUE(bio);
+    ASSERT_EQ(1, BIO_set_nbio(bio.get(), 1));
+
+    // Although this is a loopback connection, this seems to consistently
+    // trigger EINPROGRESS on our supported platforms so far. Require this for
+    // now, as it is convenient for ensuring test coverage. If the test flakes
+    // on some platform, we may need to tolerate either outcome.
+    ASSERT_EQ(-1, BIO_write(bio.get(), kTestMessage, sizeof(kTestMessage)));
+    EXPECT_TRUE(BIO_should_retry(bio.get()));
+    EXPECT_TRUE(BIO_should_io_special(bio.get()));
+    EXPECT_EQ(BIO_RR_CONNECT, BIO_get_retry_reason(bio.get()));
+
+    // Wait for the underlying socket to observe the error and try again.
+    int fd = BIO_get_fd(bio.get(), nullptr);
+    ASSERT_GT(fd, -1);
+    ASSERT_TRUE(WaitForSocket(static_cast<Socket>(fd), WaitType::kWrite));
+    ASSERT_EQ(-1, BIO_write(bio.get(), kTestMessage, sizeof(kTestMessage)));
+    expect_connect_error(bio.get());
   }
 }
 
diff --git a/crypto/bio/connect.cc b/crypto/bio/connect.cc
index 7f4a17b..328c2b1 100644
--- a/crypto/bio/connect.cc
+++ b/crypto/bio/connect.cc
@@ -209,21 +209,19 @@
         break;
 
       case BIO_CONN_S_BLOCKED_CONNECT:
-        i = bio_sock_error(FromOpaque(bio)->num);
-        if (i) {
-          if (bio_socket_should_retry(ret)) {
+        if (!bio_socket_finish_connect(FromOpaque(bio)->num)) {
+          if (bio_socket_should_retry(-1)) {
             BIO_set_retry_special(bio);
             c->state = BIO_CONN_S_BLOCKED_CONNECT;
             BIO_set_retry_reason(bio, BIO_RR_CONNECT);
-            ret = -1;
           } else {
             BIO_clear_retry_flags(bio);
             OPENSSL_PUT_SYSTEM_ERROR();
             OPENSSL_PUT_ERROR(BIO, BIO_R_NBIO_CONNECT_ERROR);
             ERR_add_error_data(4, "host=", c->param_hostname.get(), ":",
                                c->param_port.get());
-            ret = 0;
           }
+          ret = -1;
           goto exit_loop;
         } else {
           c->state = BIO_CONN_S_OK;
diff --git a/crypto/bio/internal.h b/crypto/bio/internal.h
index 19b930c..72215fc 100644
--- a/crypto/bio/internal.h
+++ b/crypto/bio/internal.h
@@ -106,8 +106,11 @@
 // TODO(fork): remove all callers of this.
 void bio_clear_socket_error();
 
-// bio_sock_error returns the last socket error on `sock`.
-int bio_sock_error(int sock);
+// bio_socket_finish_connect attempts to complete an in-progress, non-blocking
+// connect operation on `sock`. It returns one if the connect operation
+// suceeded. Otherwise, it returns zero and sets the last socket error to the
+// reason it failed.
+int bio_socket_finish_connect(int sock);
 
 // bio_socket_should_retry returns non-zero if `return_value` indicates an error
 // and the last socket error indicates that it's non-fatal.
diff --git a/crypto/bio/socket_helper.cc b/crypto/bio/socket_helper.cc
index e251b34..6c975b3 100644
--- a/crypto/bio/socket_helper.cc
+++ b/crypto/bio/socket_helper.cc
@@ -27,7 +27,9 @@
 #include <sys/types.h>
 
 #if !defined(OPENSSL_WINDOWS)
+#include <errno.h>
 #include <netdb.h>
+#include <poll.h>
 #include <unistd.h>
 #else
 #include <winsock2.h>
@@ -38,13 +40,13 @@
 #include "../internal.h"
 
 
-using namespace bssl;
+BSSL_NAMESPACE_BEGIN
 
-int bssl::bio_ip_and_port_to_socket_and_addr(int *out_sock,
-                                             struct sockaddr_storage *out_addr,
-                                             socklen_t *out_addr_length,
-                                             const char *hostname,
-                                             const char *port_str) {
+int bio_ip_and_port_to_socket_and_addr(int *out_sock,
+                                       struct sockaddr_storage *out_addr,
+                                       socklen_t *out_addr_length,
+                                       const char *hostname,
+                                       const char *port_str) {
   struct addrinfo hint, *result, *cur;
   int ret;
 
@@ -90,7 +92,7 @@
   return ret;
 }
 
-int bssl::bio_socket_nbio(int sock, int on) {
+int bio_socket_nbio(int sock, int on) {
 #if defined(OPENSSL_WINDOWS)
   u_long arg = on;
 
@@ -109,19 +111,61 @@
 #endif
 }
 
-void bssl::bio_clear_socket_error() {}
+void bio_clear_socket_error() {}
 
-int bssl::bio_sock_error(int sock) {
+int bio_socket_finish_connect(int sock) {
+  // A blocked connect signals whether it is ready based on whether it is
+  // writable. (SO_ERROR is not filled in before it is writable.)
+#if defined(OPENSSL_WINDOWS)
+  fd_set write_set, except_set;
+  FD_ZERO(&write_set);
+  FD_SET(static_cast<SOCKET>(sock), &write_set);
+  FD_ZERO(&except_set);
+  FD_SET(static_cast<SOCKET>(sock), &except_set);
+  timeval timeout = {0, 0};
+  if (select(0 /* unused on Windows */, /*readfds=*/nullptr, &write_set,
+             &except_set, &timeout) == SOCKET_ERROR) {
+    return 0;
+  }
+  if (!FD_ISSET(sock, &write_set) && !FD_ISSET(sock, &except_set)) {
+    // The connect has not completed. Set the error that |connect| would return.
+    WSASetLastError(WSAEWOULDBLOCK);
+    return 0;
+  }
+#else
+  pollfd pfd;
+  pfd.fd = sock;
+  // poll implicitly listens for POLLERR and POLLHUP.
+  pfd.events = POLLOUT;
+  pfd.revents = 0;
+  if (poll(&pfd, 1, /*timeout=*/0) < 0) {
+    return 0;
+  }
+  if (pfd.revents == 0) {
+    // The connect has not completed. Set the error that |connect| would return.
+    errno = EINPROGRESS;
+    return 0;
+  }
+#endif
+
+  // Check if the connection succeeded.
   int error;
   socklen_t error_size = sizeof(error);
-
   if (getsockopt(sock, SOL_SOCKET, SO_ERROR, (char *)&error, &error_size) < 0) {
-    return 1;
+    return 0;
   }
-  return error;
+  if (error != 0) {
+#if defined(OPENSSL_WINDOWS)
+    WSASetLastError(error);
+#else
+    errno = error;
+#endif
+    return 0;
+  }
+  return 1;
 }
 
-int bssl::bio_socket_should_retry(int return_value) {
+int bio_socket_should_retry(int return_value) {
 #if defined(OPENSSL_WINDOWS)
   return return_value == -1 && WSAGetLastError() == WSAEWOULDBLOCK;
 #else
@@ -130,4 +174,6 @@
 #endif
 }
 
+BSSL_NAMESPACE_END
+
 #endif  // OPENSSL_NO_SOCK