Allocate small TLS read buffers inline.

Our TLS read patterns are always read(5); read(record_size); read(5);
read(record_size); ...;. Allocate the 5-byte reads inline in SSLBuffer.
This avoids bouncing on a 5-byte malloc to learn a socket is idle and
avoids calling malloc twice on each record.

This costs a few bytes but means we malloc once per record, rather than
twice per record + once each time the state machine is run while idle.

Change-Id: I4f6dafe4141cbb890b921a5fa8d528c1fb98a0b4
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/39004
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/ssl/internal.h b/ssl/internal.h
index 41135e3..7ef810e 100644
--- a/ssl/internal.h
+++ b/ssl/internal.h
@@ -1239,6 +1239,11 @@
   uint16_t size_ = 0;
   // cap_ is how much memory beyond |buf_| + |offset_| is available.
   uint16_t cap_ = 0;
+  // inline_buf_ is a static buffer for short reads.
+  uint8_t inline_buf_[SSL3_RT_HEADER_LENGTH];
+  // buf_allocated_ is true if |buf_| points to allocated data and must be freed
+  // or false if it points into |inline_buf_|.
+  bool buf_allocated_ = false;
 };
 
 // ssl_read_buffer_extend_to extends the read buffer to the desired length. For
diff --git a/ssl/ssl_buffer.cc b/ssl/ssl_buffer.cc
index 49ecf90..d73055f 100644
--- a/ssl/ssl_buffer.cc
+++ b/ssl/ssl_buffer.cc
@@ -37,8 +37,11 @@
               "SSL3_ALIGN_PAYLOAD must be a power of 2");
 
 void SSLBuffer::Clear() {
-  free(buf_);  // Allocated with malloc().
+  if (buf_allocated_) {
+    free(buf_);  // Allocated with malloc().
+  }
   buf_ = nullptr;
+  buf_allocated_ = false;
   offset_ = 0;
   size_ = 0;
   cap_ = 0;
@@ -54,27 +57,43 @@
     return true;
   }
 
-  // Add up to |SSL3_ALIGN_PAYLOAD| - 1 bytes of slack for alignment.
-  //
-  // Since this buffer gets allocated quite frequently and doesn't contain any
-  // sensitive data, we allocate with malloc rather than |OPENSSL_malloc| and
-  // avoid zeroing on free.
-  uint8_t *new_buf = (uint8_t *)malloc(new_cap + SSL3_ALIGN_PAYLOAD - 1);
-  if (new_buf == NULL) {
-    OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE);
-    return false;
+  uint8_t *new_buf;
+  bool new_buf_allocated;
+  size_t new_offset;
+  if (new_cap <= sizeof(inline_buf_)) {
+    // This function is called twice per TLS record, first for the five-byte
+    // header. To avoid allocating twice, use an inline buffer for short inputs.
+    new_buf = inline_buf_;
+    new_buf_allocated = false;
+    new_offset = 0;
+  } else {
+    // Add up to |SSL3_ALIGN_PAYLOAD| - 1 bytes of slack for alignment.
+    //
+    // Since this buffer gets allocated quite frequently and doesn't contain any
+    // sensitive data, we allocate with malloc rather than |OPENSSL_malloc| and
+    // avoid zeroing on free.
+    new_buf = (uint8_t *)malloc(new_cap + SSL3_ALIGN_PAYLOAD - 1);
+    if (new_buf == NULL) {
+      OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE);
+      return false;
+    }
+    new_buf_allocated = true;
+
+    // Offset the buffer such that the record body is aligned.
+    new_offset =
+        (0 - header_len - (uintptr_t)new_buf) & (SSL3_ALIGN_PAYLOAD - 1);
   }
 
-  // Offset the buffer such that the record body is aligned.
-  size_t new_offset =
-      (0 - header_len - (uintptr_t)new_buf) & (SSL3_ALIGN_PAYLOAD - 1);
+  // Note if the both old and new buffer are inline, the source and destination
+  // may alias.
+  OPENSSL_memmove(new_buf + new_offset, buf_ + offset_, size_);
 
-  if (buf_ != NULL) {
-    OPENSSL_memcpy(new_buf + new_offset, buf_ + offset_, size_);
+  if (buf_allocated_) {
     free(buf_);  // Allocated with malloc().
   }
 
   buf_ = new_buf;
+  buf_allocated_ = new_buf_allocated;
   offset_ = new_offset;
   cap_ = new_cap;
   return true;