EVP_AEAD: implement sealv/openv for AES-GCM-SIV.

Performance note: this seems to cost 16 to 54 CPU cycles per call with
LTO, and 33 to 77 CPU cycles without; no measurable per-byte overhead
though.

Bug: 383343306
Change-Id: I93a25996014610782c088719183fdf6b40776f67
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/84127
Commit-Queue: Rudolf Polzer <rpolzer@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
Auto-Submit: Rudolf Polzer <rpolzer@google.com>
diff --git a/crypto/cipher/e_aesgcmsiv.cc b/crypto/cipher/e_aesgcmsiv.cc
index c0ac927..89322b0 100644
--- a/crypto/cipher/e_aesgcmsiv.cc
+++ b/crypto/cipher/e_aesgcmsiv.cc
@@ -19,6 +19,7 @@
 #include <openssl/cipher.h>
 #include <openssl/crypto.h>
 #include <openssl/err.h>
+#include <openssl/span.h>
 
 #include "../fipsmodule/aes/internal.h"
 #include "../fipsmodule/cipher/internal.h"
@@ -29,6 +30,10 @@
 #define EVP_AEAD_AES_GCM_SIV_TAG_LEN 16
 
 namespace {
+void inc_counter(uint8_t tag[16], uint32_t by) {
+  CRYPTO_store_u32_le(tag, CRYPTO_load_u32_le(tag) + by);
+}
+
 // TODO(davidben): AES-GCM-SIV assembly is not correct for Windows. It must save
 // and restore xmm6 through xmm15.
 #if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM) && \
@@ -223,11 +228,14 @@
 
 // gcm_siv_asm_polyval evaluates POLYVAL at |auth_key| on the given plaintext
 // and AD. The result is written to |out_tag|.
-void gcm_siv_asm_polyval(uint8_t out_tag[16], const uint8_t *in, size_t in_len,
-                         const uint8_t *ad, size_t ad_len,
+void gcm_siv_asm_polyval(uint8_t out_tag[16],
+                         bssl::Span<const CRYPTO_IOVEC> iovecs,
+                         bssl::Span<const CRYPTO_IVEC> aadvecs,
                          const uint8_t auth_key[16], const uint8_t nonce[12]) {
   OPENSSL_memset(out_tag, 0, 16);
+  const size_t ad_len = bssl::iovec::TotalLength(aadvecs);
   const size_t ad_blocks = ad_len / 16;
+  const size_t in_len = bssl::iovec::TotalLength(iovecs);
   const size_t in_blocks = in_len / 16;
   int htable_init = 0;
   alignas(16) uint8_t htable[16 * 8];
@@ -237,30 +245,36 @@
     aesgcmsiv_htable_init(htable, auth_key);
   }
 
-  if (htable_init) {
-    aesgcmsiv_htable_polyval(htable, ad, ad_len & ~15, out_tag);
-  } else {
-    aesgcmsiv_polyval_horner(out_tag, auth_key, ad, ad_blocks);
-  }
+  auto f_whole = [&](const uint8_t *in, size_t len) {
+    if (htable_init) {
+      aesgcmsiv_htable_polyval(htable, in, len, out_tag);
+    } else {
+      aesgcmsiv_polyval_horner(out_tag, auth_key, in, len / AES_BLOCK_SIZE);
+    }
+    return true;
+  };
 
-  uint8_t scratch[16];
-  if (ad_len & 15) {
-    OPENSSL_memset(scratch, 0, sizeof(scratch));
-    OPENSSL_memcpy(scratch, &ad[ad_len & ~15], ad_len & 15);
-    aesgcmsiv_polyval_horner(out_tag, auth_key, scratch, 1);
-  }
+  auto f_final = [&](const uint8_t *in, size_t len) {
+    size_t len_whole = (len / AES_BLOCK_SIZE) * AES_BLOCK_SIZE;
+    if (len_whole != 0) {
+      f_whole(in, len_whole);
+      in += len_whole;
+      len -= len_whole;
+    }
+    if (len != 0) {
+      uint8_t pad_buf[AES_BLOCK_SIZE];
+      OPENSSL_memcpy(pad_buf, in, len);
+      OPENSSL_memset(pad_buf + len, 0, AES_BLOCK_SIZE - len);
+      aesgcmsiv_polyval_horner(out_tag, auth_key, pad_buf, 1);
+    }
+    return true;
+  };
 
-  if (htable_init) {
-    aesgcmsiv_htable_polyval(htable, in, in_len & ~15, out_tag);
-  } else {
-    aesgcmsiv_polyval_horner(out_tag, auth_key, in, in_blocks);
-  }
+  bssl::iovec::ForEachBlockRange<AES_BLOCK_SIZE, /*WriteOut=*/false>(
+      aadvecs, f_whole, f_final);
 
-  if (in_len & 15) {
-    OPENSSL_memset(scratch, 0, sizeof(scratch));
-    OPENSSL_memcpy(scratch, &in[in_len & ~15], in_len & 15);
-    aesgcmsiv_polyval_horner(out_tag, auth_key, scratch, 1);
-  }
+  bssl::iovec::ForEachBlockRange<AES_BLOCK_SIZE, /*WriteOut=*/false>(
+      iovecs, f_whole, f_final);
 
   uint8_t length_block[16];
   CRYPTO_store_u64_le(length_block, ad_len * 8);
@@ -276,16 +290,16 @@
 
 // aead_aes_gcm_siv_asm_crypt_last_block handles the encryption/decryption
 // (same thing in CTR mode) of the final block of a plaintext/ciphertext. It
-// writes |in_len| & 15 bytes to |out| + |in_len|, based on an initial counter
-// derived from |tag|.
+// writes |total_in_len| & 15 bytes to |out_last_block|, based on an initial
+// counter derived from |tag|.
 void aead_aes_gcm_siv_asm_crypt_last_block(
-    int is_128_bit, uint8_t *out, const uint8_t *in, size_t in_len,
-    const uint8_t tag[16],
+    int is_128_bit, uint8_t *out_last_block, const uint8_t *in_last_block,
+    size_t total_in_len, const uint8_t tag[16],
     const struct aead_aes_gcm_siv_asm_ctx *enc_key_expanded) {
   alignas(16) uint8_t counter[16];
   OPENSSL_memcpy(&counter, tag, sizeof(counter));
   counter[15] |= 0x80;
-  CRYPTO_store_u32_le(counter, CRYPTO_load_u32_le(counter) + in_len / 16);
+  inc_counter(counter, static_cast<uint32_t>(total_in_len / 16));
 
   if (is_128_bit) {
     aes128gcmsiv_ecb_enc_block(counter, counter, enc_key_expanded);
@@ -293,12 +307,9 @@
     aes256gcmsiv_ecb_enc_block(counter, counter, enc_key_expanded);
   }
 
-  const size_t last_bytes_offset = in_len & ~15;
-  const size_t last_bytes_len = in_len & 15;
-  uint8_t *last_bytes_out = &out[last_bytes_offset];
-  const uint8_t *last_bytes_in = &in[last_bytes_offset];
+  const size_t last_bytes_len = total_in_len & 15;
   for (size_t i = 0; i < last_bytes_len; i++) {
-    last_bytes_out[i] = last_bytes_in[i] ^ counter[i];
+    out_last_block[i] = in_last_block[i] ^ counter[i];
   }
 }
 
@@ -329,13 +340,16 @@
   out_record_auth_key[1] = key_material[2];
 }
 
-int aead_aes_gcm_siv_asm_seal_scatter(
-    const EVP_AEAD_CTX *ctx, uint8_t *out, uint8_t *out_tag,
-    size_t *out_tag_len, size_t max_out_tag_len, const uint8_t *nonce,
-    size_t nonce_len, const uint8_t *in, size_t in_len, const uint8_t *extra_in,
-    size_t extra_in_len, const uint8_t *ad, size_t ad_len) {
+int aead_aes_gcm_siv_asm_sealv(const EVP_AEAD_CTX *ctx,
+                               bssl::Span<const CRYPTO_IOVEC> iovecs,
+                               uint8_t *out_tag, size_t *out_tag_len,
+                               size_t max_out_tag_len, const uint8_t *nonce,
+                               size_t nonce_len,
+                               bssl::Span<const CRYPTO_IVEC> aadvecs) {
   const struct aead_aes_gcm_siv_asm_ctx *gcm_siv_ctx = asm_ctx_from_ctx(ctx);
+  const size_t in_len = bssl::iovec::TotalLength(iovecs);
   const uint64_t in_len_64 = in_len;
+  const size_t ad_len = bssl::iovec::TotalLength(aadvecs);
   const uint64_t ad_len_64 = ad_len;
 
   if (in_len_64 > (UINT64_C(1) << 36) || ad_len_64 >= (UINT64_C(1) << 61)) {
@@ -359,34 +373,89 @@
                        record_enc_key, nonce);
 
   alignas(16) uint8_t tag[16] = {0};
-  gcm_siv_asm_polyval(tag, in, in_len, ad, ad_len,
-                      (const uint8_t *)record_auth_key, nonce);
+  gcm_siv_asm_polyval(tag, iovecs, aadvecs, (const uint8_t *)record_auth_key,
+                      nonce);
 
   struct aead_aes_gcm_siv_asm_ctx enc_key_expanded;
 
   if (gcm_siv_ctx->is_128_bit) {
     aes128gcmsiv_aes_ks_enc_x1(tag, tag, &enc_key_expanded.key[0],
                                record_enc_key);
-
-    if (in_len < 128) {
-      aes128gcmsiv_enc_msg_x4(in, out, tag, &enc_key_expanded, in_len & ~15);
-    } else {
-      aes128gcmsiv_enc_msg_x8(in, out, tag, &enc_key_expanded, in_len & ~15);
-    }
+    // Maintain a counter across calls to assembly. The functions internally set
+    // the MSB of the last byte, so we only need to update the counter.
+    alignas(16) uint8_t counter[16];
+    OPENSSL_memcpy(counter, tag, 16);
+    bssl::iovec::ForEachBlockRange<AES_BLOCK_SIZE, /*WriteOut=*/true>(
+        iovecs,
+        [&](const uint8_t *in, uint8_t *out, size_t len) {
+          if (len >= 128) {
+            aes128gcmsiv_enc_msg_x8(in, out, counter, &enc_key_expanded, len);
+          } else {
+            aes128gcmsiv_enc_msg_x4(in, out, counter, &enc_key_expanded, len);
+          }
+          inc_counter(counter, static_cast<uint32_t>(len / AES_BLOCK_SIZE));
+          return true;
+        },
+        [&](const uint8_t *in, uint8_t *out, size_t len) {
+          size_t len_whole = (len / AES_BLOCK_SIZE) * AES_BLOCK_SIZE;
+          if (len_whole != 0) {
+            if (len_whole >= 128) {
+              aes128gcmsiv_enc_msg_x8(in, out, counter, &enc_key_expanded,
+                                      len_whole);
+            } else {
+              aes128gcmsiv_enc_msg_x4(in, out, counter, &enc_key_expanded,
+                                      len_whole);
+            }
+            in += len_whole;
+            out += len_whole;
+            len -= len_whole;
+          }
+          if (len != 0) {
+            aead_aes_gcm_siv_asm_crypt_last_block(
+                /*is_128_bit=*/true, /*out_last_block=*/out,
+                /*in_last_block=*/in, /*total_in_len=*/in_len, tag,
+                &enc_key_expanded);
+          }
+          return true;
+        });
   } else {
     aes256gcmsiv_aes_ks_enc_x1(tag, tag, &enc_key_expanded.key[0],
                                record_enc_key);
-
-    if (in_len < 128) {
-      aes256gcmsiv_enc_msg_x4(in, out, tag, &enc_key_expanded, in_len & ~15);
-    } else {
-      aes256gcmsiv_enc_msg_x8(in, out, tag, &enc_key_expanded, in_len & ~15);
-    }
-  }
-
-  if (in_len & 15) {
-    aead_aes_gcm_siv_asm_crypt_last_block(gcm_siv_ctx->is_128_bit, out, in,
-                                          in_len, tag, &enc_key_expanded);
+    alignas(16) uint8_t counter[16];
+    OPENSSL_memcpy(counter, tag, 16);
+    bssl::iovec::ForEachBlockRange<AES_BLOCK_SIZE, /*WriteOut=*/true>(
+        iovecs,
+        [&](const uint8_t *in, uint8_t *out, size_t len) {
+          if (len >= 128) {
+            aes256gcmsiv_enc_msg_x8(in, out, counter, &enc_key_expanded, len);
+          } else {
+            aes256gcmsiv_enc_msg_x4(in, out, counter, &enc_key_expanded, len);
+          }
+          inc_counter(counter, static_cast<uint32_t>(len / AES_BLOCK_SIZE));
+          return true;
+        },
+        [&](const uint8_t *in, uint8_t *out, size_t len) {
+          size_t len_whole = (len / AES_BLOCK_SIZE) * AES_BLOCK_SIZE;
+          if (len_whole != 0) {
+            if (len_whole >= 128) {
+              aes256gcmsiv_enc_msg_x8(in, out, counter, &enc_key_expanded,
+                                      len_whole);
+            } else {
+              aes256gcmsiv_enc_msg_x4(in, out, counter, &enc_key_expanded,
+                                      len_whole);
+            }
+            in += len_whole;
+            out += len_whole;
+            len -= len_whole;
+          }
+          if (len != 0) {
+            aead_aes_gcm_siv_asm_crypt_last_block(
+                /*is_128_bit=*/false, /*out_last_block=*/out,
+                /*in_last_block=*/in, /*total_in_len=*/in_len, tag,
+                &enc_key_expanded);
+          }
+          return true;
+        });
   }
 
   OPENSSL_memcpy(out_tag, tag, sizeof(tag));
@@ -395,17 +464,20 @@
   return 1;
 }
 
-int aead_aes_gcm_siv_asm_open_gather(const EVP_AEAD_CTX *ctx, uint8_t *out,
-                                     const uint8_t *nonce, size_t nonce_len,
-                                     const uint8_t *in, size_t in_len,
-                                     const uint8_t *in_tag, size_t in_tag_len,
-                                     const uint8_t *ad, size_t ad_len) {
+int aead_aes_gcm_siv_asm_openv_detached(const EVP_AEAD_CTX *ctx,
+                                        bssl::Span<const CRYPTO_IOVEC> iovecs,
+                                        const uint8_t *nonce, size_t nonce_len,
+                                        const uint8_t *in_tag,
+                                        size_t in_tag_len,
+                                        bssl::Span<const CRYPTO_IVEC> aadvecs) {
+  const size_t ad_len = bssl::iovec::TotalLength(aadvecs);
   const uint64_t ad_len_64 = ad_len;
   if (ad_len_64 >= (UINT64_C(1) << 61)) {
     OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_TOO_LARGE);
     return 0;
   }
 
+  const size_t in_len = bssl::iovec::TotalLength(iovecs);
   const uint64_t in_len_64 = in_len;
   if (in_len_64 > UINT64_C(1) << 36 ||
       in_tag_len != EVP_AEAD_AES_GCM_SIV_TAG_LEN) {
@@ -436,38 +508,87 @@
   alignas(16) uint8_t calculated_tag[16 * 8] = {0};
 
   OPENSSL_memset(calculated_tag, 0, EVP_AEAD_AES_GCM_SIV_TAG_LEN);
-  const size_t ad_blocks = ad_len / 16;
-  aesgcmsiv_polyval_horner(calculated_tag, (const uint8_t *)record_auth_key, ad,
-                           ad_blocks);
-
-  uint8_t scratch[16];
-  if (ad_len & 15) {
-    OPENSSL_memset(scratch, 0, sizeof(scratch));
-    OPENSSL_memcpy(scratch, &ad[ad_len & ~15], ad_len & 15);
-    aesgcmsiv_polyval_horner(calculated_tag, (const uint8_t *)record_auth_key,
-                             scratch, 1);
-  }
+  bssl::iovec::ForEachBlockRange<AES_BLOCK_SIZE, /*WriteOut=*/false>(
+      aadvecs,
+      [&](const uint8_t *in, size_t len) {
+        aesgcmsiv_polyval_horner(calculated_tag,
+                                 (const uint8_t *)record_auth_key, in,
+                                 len / AES_BLOCK_SIZE);
+        return true;
+      },
+      [&](const uint8_t *in, size_t len) {
+        size_t len_whole = (len / AES_BLOCK_SIZE) * AES_BLOCK_SIZE;
+        if (len_whole != 0) {
+          aesgcmsiv_polyval_horner(calculated_tag,
+                                   (const uint8_t *)record_auth_key, in,
+                                   len_whole / AES_BLOCK_SIZE);
+          in += len_whole;
+          len -= len_whole;
+        }
+        if (len != 0) {
+          uint8_t pad_buf[AES_BLOCK_SIZE];
+          OPENSSL_memcpy(pad_buf, in, len);
+          OPENSSL_memset(pad_buf + len, 0, AES_BLOCK_SIZE - len);
+          aesgcmsiv_polyval_horner(
+              calculated_tag, (const uint8_t *)record_auth_key, pad_buf, 1);
+        }
+        return true;
+      });
 
   alignas(16) uint8_t htable[16 * 6];
   aesgcmsiv_htable6_init(htable, (const uint8_t *)record_auth_key);
 
-  // aes[128|256]gcmsiv_dec needs access to the claimed tag. So it's put into
-  // its scratch space.
-  memcpy(calculated_tag + 16, in_tag, EVP_AEAD_AES_GCM_SIV_TAG_LEN);
-  if (gcm_siv_ctx->is_128_bit) {
-    aes128gcmsiv_dec(in, out, calculated_tag, htable, &expanded_key, in_len);
-  } else {
-    aes256gcmsiv_dec(in, out, calculated_tag, htable, &expanded_key, in_len);
-  }
-
-  if (in_len & 15) {
-    aead_aes_gcm_siv_asm_crypt_last_block(gcm_siv_ctx->is_128_bit, out, in,
-                                          in_len, in_tag, &expanded_key);
-    OPENSSL_memset(scratch, 0, sizeof(scratch));
-    OPENSSL_memcpy(scratch, out + (in_len & ~15), in_len & 15);
-    aesgcmsiv_polyval_horner(calculated_tag, (const uint8_t *)record_auth_key,
-                             scratch, 1);
-  }
+  size_t blocks = 0;
+  bssl::iovec::ForEachBlockRange<AES_BLOCK_SIZE, /*WriteOut=*/true>(
+      iovecs,
+      [&](const uint8_t *in, uint8_t *out, size_t len) {
+        // aes[128|256]gcmsiv_dec needs access to the claimed tag. So it's put
+        // into its scratch space. The function may clobber the claimed tag, so
+        // this is copied before each call.
+        OPENSSL_memcpy(calculated_tag + 16, in_tag,
+                       EVP_AEAD_AES_GCM_SIV_TAG_LEN);
+        inc_counter(calculated_tag + 16, static_cast<uint32_t>(blocks));
+        if (gcm_siv_ctx->is_128_bit) {
+          aes128gcmsiv_dec(in, out, calculated_tag, htable, &expanded_key, len);
+        } else {
+          aes256gcmsiv_dec(in, out, calculated_tag, htable, &expanded_key, len);
+        }
+        blocks += len / AES_BLOCK_SIZE;
+        return true;
+      },
+      [&](const uint8_t *in, uint8_t *out, size_t len) {
+        size_t len_whole = (len / AES_BLOCK_SIZE) * AES_BLOCK_SIZE;
+        if (len_whole != 0) {
+          // aes[128|256]gcmsiv_dec needs access to the claimed tag. So it's put
+          // into its scratch space. The function may clobber the claimed tag,
+          // so this is copied before each call.
+          OPENSSL_memcpy(calculated_tag + 16, in_tag,
+                         EVP_AEAD_AES_GCM_SIV_TAG_LEN);
+          inc_counter(calculated_tag + 16, static_cast<uint32_t>(blocks));
+          if (gcm_siv_ctx->is_128_bit) {
+            aes128gcmsiv_dec(in, out, calculated_tag, htable, &expanded_key,
+                             len_whole);
+          } else {
+            aes256gcmsiv_dec(in, out, calculated_tag, htable, &expanded_key,
+                             len_whole);
+          }
+          in += len_whole;
+          out += len_whole;
+          len -= len_whole;
+        }
+        if (len != 0) {
+          aead_aes_gcm_siv_asm_crypt_last_block(
+              gcm_siv_ctx->is_128_bit, /*last_block_out=*/out,
+              /*last_block_in=*/in, /*total_in_len=*/in_len, in_tag,
+              &expanded_key);
+          uint8_t pad_buf[AES_BLOCK_SIZE];
+          OPENSSL_memcpy(pad_buf, out, len);
+          OPENSSL_memset(pad_buf + len, 0, AES_BLOCK_SIZE - len);
+          aesgcmsiv_polyval_horner(
+              calculated_tag, (const uint8_t *)record_auth_key, pad_buf, 1);
+        }
+        return true;
+      });
 
   uint8_t length_block[16];
   CRYPTO_store_u64_le(length_block, ad_len * 8);
@@ -507,11 +628,11 @@
     nullptr /* init_with_direction */,
     aead_aes_gcm_siv_asm_cleanup,
     nullptr /* open */,
-    aead_aes_gcm_siv_asm_seal_scatter,
-    aead_aes_gcm_siv_asm_open_gather,
+    nullptr /* seal_scatter */,
+    nullptr /* open_gather */,
     nullptr /* openv */,
-    nullptr /* sealv */,
-    nullptr /* openv_detached */,
+    aead_aes_gcm_siv_asm_sealv,
+    aead_aes_gcm_siv_asm_openv_detached,
     nullptr /* get_iv */,
     nullptr /* tag_len */,
 };
@@ -527,11 +648,11 @@
     nullptr /* init_with_direction */,
     aead_aes_gcm_siv_asm_cleanup,
     nullptr /* open */,
-    aead_aes_gcm_siv_asm_seal_scatter,
-    aead_aes_gcm_siv_asm_open_gather,
+    nullptr /* seal_scatter */,
+    nullptr /* open_gather */,
     nullptr /* openv */,
-    nullptr /* sealv */,
-    nullptr /* openv_detached */,
+    aead_aes_gcm_siv_asm_sealv,
+    aead_aes_gcm_siv_asm_openv_detached,
     nullptr /* get_iv */,
     nullptr /* tag_len */,
 };
@@ -586,14 +707,14 @@
 
 void aead_aes_gcm_siv_cleanup(EVP_AEAD_CTX *ctx) {}
 
-// gcm_siv_crypt encrypts (or decrypts—it's the same thing) |in_len| bytes from
-// |in| to |out|, using the block function |enc_block| with |key| in counter
-// mode, starting at |initial_counter|. This differs from the traditional
-// counter mode code in that the counter is handled little-endian, only the
-// first four bytes are used and the GCM-SIV tweak to the final byte is
-// applied. The |in| and |out| pointers may be equal but otherwise must not
+// gcm_siv_crypt encrypts (or decrypts—it's the same thing) bytes from |in| to
+// |out| in the |iovec|, using the block function |enc_block| with |key| in
+// counter mode, starting at |initial_counter|. This differs from the
+// traditional counter mode code in that the counter is handled little-endian,
+// only the first four bytes are used and the GCM-SIV tweak to the final byte
+// is applied. The |in| and |out| pointers may be equal but otherwise must not
 // alias.
-void gcm_siv_crypt(uint8_t *out, const uint8_t *in, size_t in_len,
+void gcm_siv_crypt(bssl::Span<const CRYPTO_IOVEC> iovecs,
                    const uint8_t initial_counter[AES_BLOCK_SIZE],
                    block128_f enc_block, const AES_KEY *key) {
   uint8_t counter[16];
@@ -601,22 +722,28 @@
   OPENSSL_memcpy(counter, initial_counter, AES_BLOCK_SIZE);
   counter[15] |= 0x80;
 
-  for (size_t done = 0; done < in_len;) {
-    uint8_t keystream[AES_BLOCK_SIZE];
-    enc_block(counter, keystream, key);
-    CRYPTO_store_u32_le(counter, CRYPTO_load_u32_le(counter) + 1);
+  auto crypt_bytes = [&](const uint8_t *in, uint8_t *out, size_t len) {
+    for (size_t done = 0; done < len;) {
+      uint8_t keystream[AES_BLOCK_SIZE];
+      enc_block(counter, keystream, key);
+      inc_counter(counter, 1);
 
-    size_t todo = AES_BLOCK_SIZE;
-    if (in_len - done < todo) {
-      todo = in_len - done;
+      size_t todo = AES_BLOCK_SIZE;
+      if (len - done < todo) {
+        todo = len - done;
+      }
+
+      for (size_t i = 0; i < todo; i++) {
+        out[done + i] = keystream[i] ^ in[done + i];
+      }
+
+      done += todo;
     }
+    return true;
+  };
 
-    for (size_t i = 0; i < todo; i++) {
-      out[done + i] = keystream[i] ^ in[done + i];
-    }
-
-    done += todo;
-  }
+  bssl::iovec::ForEachBlockRange<AES_BLOCK_SIZE, /*WriteOut=*/true>(
+      iovecs, crypt_bytes, crypt_bytes);
 }
 
 
@@ -702,32 +829,48 @@
 
 // gcm_siv_polyval evaluates POLYVAL at |auth_key| on the given plaintext and
 // AD. The result is written to |out_tag|.
-void gcm_siv_polyval(uint8_t out_tag[16], const uint8_t *in, size_t in_len,
-                     const uint8_t *ad, size_t ad_len,
+void gcm_siv_polyval(uint8_t out_tag[16], bssl::Span<const CRYPTO_IOVEC> iovecs,
+                     bool encrypt, bssl::Span<const CRYPTO_IVEC> aadvecs,
                      const uint8_t auth_key[16],
                      const uint8_t nonce[EVP_AEAD_AES_GCM_SIV_NONCE_LEN]) {
   struct polyval_ctx polyval_ctx;
   crypto_polyval_init(&polyval_ctx, auth_key);
 
-  crypto_polyval_update_blocks(&polyval_ctx, ad, ad_len & ~15);
+  auto f_whole = [&](const uint8_t *in, size_t len) {
+    crypto_polyval_update_blocks(&polyval_ctx, in, len);
+    return true;
+  };
+  auto f_final = [&](const uint8_t *in, size_t len) {
+    size_t len_whole = (len / AES_BLOCK_SIZE) * AES_BLOCK_SIZE;
+    if (len_whole != 0) {
+      crypto_polyval_update_blocks(&polyval_ctx, in, len_whole);
+      in += len_whole;
+      len -= len_whole;
+    }
+    if (len != 0) {
+      uint8_t pad_buf[AES_BLOCK_SIZE];
+      OPENSSL_memcpy(pad_buf, in, len);
+      OPENSSL_memset(pad_buf + len, 0, AES_BLOCK_SIZE - len);
+      crypto_polyval_update_blocks(&polyval_ctx, pad_buf, AES_BLOCK_SIZE);
+    }
+    return true;
+  };
 
-  uint8_t scratch[16];
-  if (ad_len & 15) {
-    OPENSSL_memset(scratch, 0, sizeof(scratch));
-    OPENSSL_memcpy(scratch, &ad[ad_len & ~15], ad_len & 15);
-    crypto_polyval_update_blocks(&polyval_ctx, scratch, sizeof(scratch));
-  }
+  bssl::iovec::ForEachBlockRange<AES_BLOCK_SIZE, /*WriteOut=*/false>(
+      aadvecs, f_whole, f_final);
 
-  crypto_polyval_update_blocks(&polyval_ctx, in, in_len & ~15);
-  if (in_len & 15) {
-    OPENSSL_memset(scratch, 0, sizeof(scratch));
-    OPENSSL_memcpy(scratch, &in[in_len & ~15], in_len & 15);
-    crypto_polyval_update_blocks(&polyval_ctx, scratch, sizeof(scratch));
+  if (encrypt) {
+    bssl::iovec::ForEachBlockRange<AES_BLOCK_SIZE, /*WriteOut=*/false>(
+        iovecs, f_whole, f_final);
+  } else {
+    bssl::iovec::ForEachOutBlockRange<AES_BLOCK_SIZE>(iovecs, f_whole, f_final);
   }
 
   uint8_t length_block[16];
-  CRYPTO_store_u64_le(length_block, ((uint64_t)ad_len) * 8);
-  CRYPTO_store_u64_le(length_block + 8, ((uint64_t)in_len) * 8);
+  CRYPTO_store_u64_le(length_block,
+                      ((uint64_t)bssl::iovec::TotalLength(aadvecs)) * 8);
+  CRYPTO_store_u64_le(length_block + 8,
+                      ((uint64_t)bssl::iovec::TotalLength(iovecs)) * 8);
   crypto_polyval_update_blocks(&polyval_ctx, length_block,
                                sizeof(length_block));
 
@@ -782,15 +925,16 @@
                   key_material + 16, gcm_siv_ctx->is_256 ? 32 : 16);
 }
 
-int aead_aes_gcm_siv_seal_scatter(const EVP_AEAD_CTX *ctx, uint8_t *out,
-                                  uint8_t *out_tag, size_t *out_tag_len,
-                                  size_t max_out_tag_len, const uint8_t *nonce,
-                                  size_t nonce_len, const uint8_t *in,
-                                  size_t in_len, const uint8_t *extra_in,
-                                  size_t extra_in_len, const uint8_t *ad,
-                                  size_t ad_len) {
+int aead_aes_gcm_siv_sealv(const EVP_AEAD_CTX *ctx,
+                           bssl::Span<const CRYPTO_IOVEC> iovecs,
+                           uint8_t *out_tag, size_t *out_tag_len,
+                           size_t max_out_tag_len, const uint8_t *nonce,
+                           size_t nonce_len,
+                           bssl::Span<const CRYPTO_IVEC> aadvecs) {
   const struct aead_aes_gcm_siv_ctx *gcm_siv_ctx =
       (struct aead_aes_gcm_siv_ctx *)&ctx->state;
+  size_t in_len = bssl::iovec::TotalLength(iovecs);
+  size_t ad_len = bssl::iovec::TotalLength(aadvecs);
   const uint64_t in_len_64 = in_len;
   const uint64_t ad_len_64 = ad_len;
 
@@ -814,10 +958,10 @@
   gcm_siv_keys(gcm_siv_ctx, &keys, nonce);
 
   uint8_t tag[16];
-  gcm_siv_polyval(tag, in, in_len, ad, ad_len, keys.auth_key, nonce);
+  gcm_siv_polyval(tag, iovecs, true, aadvecs, keys.auth_key, nonce);
   keys.enc_block(tag, tag, &keys.enc_key.ks);
 
-  gcm_siv_crypt(out, in, in_len, tag, keys.enc_block, &keys.enc_key.ks);
+  gcm_siv_crypt(iovecs, tag, keys.enc_block, &keys.enc_key.ks);
 
   OPENSSL_memcpy(out_tag, tag, EVP_AEAD_AES_GCM_SIV_TAG_LEN);
   *out_tag_len = EVP_AEAD_AES_GCM_SIV_TAG_LEN;
@@ -825,18 +969,18 @@
   return 1;
 }
 
-int aead_aes_gcm_siv_open_gather(const EVP_AEAD_CTX *ctx, uint8_t *out,
-                                 const uint8_t *nonce, size_t nonce_len,
-                                 const uint8_t *in, size_t in_len,
-                                 const uint8_t *in_tag, size_t in_tag_len,
-                                 const uint8_t *ad, size_t ad_len) {
-  const uint64_t ad_len_64 = ad_len;
+int aead_aes_gcm_siv_openv_detached(const EVP_AEAD_CTX *ctx,
+                                    bssl::Span<const CRYPTO_IOVEC> iovecs,
+                                    const uint8_t *nonce, size_t nonce_len,
+                                    const uint8_t *in_tag, size_t in_tag_len,
+                                    bssl::Span<const CRYPTO_IVEC> aadvecs) {
+  const uint64_t ad_len_64 = bssl::iovec::TotalLength(aadvecs);
   if (ad_len_64 >= (UINT64_C(1) << 61)) {
     OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_TOO_LARGE);
     return 0;
   }
 
-  const uint64_t in_len_64 = in_len;
+  const uint64_t in_len_64 = bssl::iovec::TotalLength(iovecs);
   if (in_tag_len != EVP_AEAD_AES_GCM_SIV_TAG_LEN ||
       in_len_64 > (UINT64_C(1) << 36) + AES_BLOCK_SIZE) {
     OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
@@ -854,10 +998,10 @@
   struct gcm_siv_record_keys keys;
   gcm_siv_keys(gcm_siv_ctx, &keys, nonce);
 
-  gcm_siv_crypt(out, in, in_len, in_tag, keys.enc_block, &keys.enc_key.ks);
+  gcm_siv_crypt(iovecs, in_tag, keys.enc_block, &keys.enc_key.ks);
 
   uint8_t expected_tag[EVP_AEAD_AES_GCM_SIV_TAG_LEN];
-  gcm_siv_polyval(expected_tag, out, in_len, ad, ad_len, keys.auth_key, nonce);
+  gcm_siv_polyval(expected_tag, iovecs, false, aadvecs, keys.auth_key, nonce);
   keys.enc_block(expected_tag, expected_tag, &keys.enc_key.ks);
 
   if (CRYPTO_memcmp(expected_tag, in_tag, sizeof(expected_tag)) != 0) {
@@ -879,11 +1023,11 @@
     nullptr /* init_with_direction */,
     aead_aes_gcm_siv_cleanup,
     nullptr /* open */,
-    aead_aes_gcm_siv_seal_scatter,
-    aead_aes_gcm_siv_open_gather,
+    nullptr /* seal_scatter */,
+    nullptr /* open_gather */,
     nullptr /* openv */,
-    nullptr /* sealv */,
-    nullptr /* openv_detached */,
+    aead_aes_gcm_siv_sealv,
+    aead_aes_gcm_siv_openv_detached,
     nullptr /* get_iv */,
     nullptr /* tag_len */,
 };
@@ -899,11 +1043,11 @@
     nullptr /* init_with_direction */,
     aead_aes_gcm_siv_cleanup,
     nullptr /* open */,
-    aead_aes_gcm_siv_seal_scatter,
-    aead_aes_gcm_siv_open_gather,
+    nullptr /* seal_scatter */,
+    nullptr /* open_gather */,
     nullptr /* openv */,
-    nullptr /* sealv */,
-    nullptr /* openv_detached */,
+    aead_aes_gcm_siv_sealv,
+    aead_aes_gcm_siv_openv_detached,
     nullptr /* get_iv */,
     nullptr /* tag_len */,
 };