Always provide a ctr32_encrypt_blocks function

There were two places where the ctr32_encrypt_blocks function could be
missing:

1. In the AES-CTR EVP_CIPHER, with aes_nohw, we forgot to fill it in but
   we actually provide it. We don't care enough about AES-CTR to
   benchmark it, but I expect this to be a significant perf win because
   aes_nohw is bitsliced.

2. The 32-bit x86 vpaes implementation did not have a
   ctr32_encrypt_blocks. Just implement it in C.

This allows us to delete a ton of code for a broadly unreachable
codepath.

32-bit x86 is not a particularly important target in 2024, but just to
confirm, here is the impact on AES-GCM on an Intel(R) Xeon(R) Gold 6154
CPU @ 3.00GHz, running in 32-bit mode and with
OPENSSL_ia32cap='~0x200000000000000'

Before:
Did 5587000 AES-128-GCM (16 bytes) seal operations in 1000098us (5586452.5 ops/sec): 89.4 MB/s
Did 1063000 AES-128-GCM (256 bytes) seal operations in 1000036us (1062961.7 ops/sec): 272.1 MB/s
Did 225000 AES-128-GCM (1350 bytes) seal operations in 1001725us (224612.5 ops/sec): 303.2 MB/s
Did 39000 AES-128-GCM (8192 bytes) seal operations in 1014011us (38461.1 ops/sec): 315.1 MB/s
Did 19127 AES-128-GCM (16384 bytes) seal operations in 1009808us (18941.2 ops/sec): 310.3 MB/s
Did 4742000 AES-256-GCM (16 bytes) seal operations in 1000051us (4741758.2 ops/sec): 75.9 MB/s
Did 820000 AES-256-GCM (256 bytes) seal operations in 1000199us (819836.9 ops/sec): 209.9 MB/s
Did 171000 AES-256-GCM (1350 bytes) seal operations in 1000656us (170887.9 ops/sec): 230.7 MB/s
Did 30000 AES-256-GCM (8192 bytes) seal operations in 1034187us (29008.3 ops/sec): 237.6 MB/s
Did 15000 AES-256-GCM (16384 bytes) seal operations in 1031233us (14545.7 ops/sec): 238.3 MB/s
After:
Did 5314000 AES-128-GCM (16 bytes) seal operations in 1000040us (5313787.4 ops/sec): 85.0 MB/s
Did 1035000 AES-128-GCM (256 bytes) seal operations in 1000919us (1034049.7 ops/sec): 264.7 MB/s
Did 220000 AES-128-GCM (1350 bytes) seal operations in 1001498us (219670.9 ops/sec): 296.6 MB/s
Did 37000 AES-128-GCM (8192 bytes) seal operations in 1009332us (36657.9 ops/sec): 300.3 MB/s
Did 19000 AES-128-GCM (16384 bytes) seal operations in 1018435us (18656.1 ops/sec): 305.7 MB/s
Did 4512250 AES-256-GCM (16 bytes) seal operations in 1000047us (4512037.9 ops/sec): 72.2 MB/s
Did 803000 AES-256-GCM (256 bytes) seal operations in 1000917us (802264.3 ops/sec): 205.4 MB/s
Did 168000 AES-256-GCM (1350 bytes) seal operations in 1002181us (167634.4 ops/sec): 226.3 MB/s
Did 29000 AES-256-GCM (8192 bytes) seal operations in 1033945us (28047.9 ops/sec): 229.8 MB/s
Did 15000 AES-256-GCM (16384 bytes) seal operations in 1055312us (14213.8 ops/sec): 232.9 MB/s

It's a bit slower, but comparable. Any affected device should really be
served ChaCha20-Poly1305 anyway. (Of course, this is entirely
unscientific because this code would never run on anything remotely like
this CPU.)

Fixed: 383994657
Change-Id: Ifca6a6195b20497dc7053d736c61b9000d3074f9
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/74267
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/cipher_extra/e_aesctrhmac.cc b/crypto/cipher_extra/e_aesctrhmac.cc
index f977a89..be94d73 100644
--- a/crypto/cipher_extra/e_aesctrhmac.cc
+++ b/crypto/cipher_extra/e_aesctrhmac.cc
@@ -154,7 +154,7 @@
     const struct aead_aes_ctr_hmac_sha256_ctx *aes_ctx, uint8_t *out,
     const uint8_t *in, size_t len, const uint8_t *nonce) {
   // Since the AEAD operation is one-shot, keeping a buffer of unused keystream
-  // bytes is pointless. However, |CRYPTO_ctr128_encrypt| requires it.
+  // bytes is pointless. However, |CRYPTO_ctr128_encrypt_ctr32| requires it.
   uint8_t partial_block_buffer[AES_BLOCK_SIZE];
   unsigned partial_block_offset = 0;
   OPENSSL_memset(partial_block_buffer, 0, sizeof(partial_block_buffer));
@@ -163,15 +163,9 @@
   OPENSSL_memcpy(counter, nonce, EVP_AEAD_AES_CTR_HMAC_SHA256_NONCE_LEN);
   OPENSSL_memset(counter + EVP_AEAD_AES_CTR_HMAC_SHA256_NONCE_LEN, 0, 4);
 
-  if (aes_ctx->ctr) {
-    CRYPTO_ctr128_encrypt_ctr32(in, out, len, &aes_ctx->ks.ks, counter,
-                                partial_block_buffer, &partial_block_offset,
-                                aes_ctx->ctr);
-  } else {
-    CRYPTO_ctr128_encrypt(in, out, len, &aes_ctx->ks.ks, counter,
-                          partial_block_buffer, &partial_block_offset,
-                          aes_ctx->block);
-  }
+  CRYPTO_ctr128_encrypt_ctr32(in, out, len, &aes_ctx->ks.ks, counter,
+                              partial_block_buffer, &partial_block_offset,
+                              aes_ctx->ctr);
 }
 
 static int aead_aes_ctr_hmac_sha256_seal_scatter(
diff --git a/crypto/fipsmodule/aes/aes.cc.inc b/crypto/fipsmodule/aes/aes.cc.inc
index 7eab5ac..05f7a2b 100644
--- a/crypto/fipsmodule/aes/aes.cc.inc
+++ b/crypto/fipsmodule/aes/aes.cc.inc
@@ -125,3 +125,22 @@
   }
 }
 #endif
+
+#if defined(VPAES) && defined(OPENSSL_X86)
+// On x86, there is no |vpaes_ctr32_encrypt_blocks|, so we implement it
+// ourselves. This avoids all callers needing to account for a missing function.
+void vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t blocks,
+                                const AES_KEY *key, const uint8_t iv[16]) {
+  uint32_t ctr = CRYPTO_load_u32_be(iv + 12);
+  uint8_t iv_buf[16], enc[16];
+  OPENSSL_memcpy(iv_buf, iv, 12);
+  for (size_t i = 0; i < blocks; i++) {
+    CRYPTO_store_u32_be(iv_buf + 12, ctr);
+    vpaes_encrypt(iv_buf, enc, key);
+    CRYPTO_xor16(out, in, enc);
+    ctr++;
+    in += 16;
+    out += 16;
+  }
+}
+#endif
diff --git a/crypto/fipsmodule/aes/aes_test.cc b/crypto/fipsmodule/aes/aes_test.cc
index 75d0520..2024acd 100644
--- a/crypto/fipsmodule/aes/aes_test.cc
+++ b/crypto/fipsmodule/aes/aes_test.cc
@@ -316,9 +316,7 @@
         CHECK_ABI(vpaes_cbc_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key,
                   block, AES_ENCRYPT);
 #endif
-#if defined(VPAES_CTR32)
         CHECK_ABI(vpaes_ctr32_encrypt_blocks, buf, buf, blocks, &key, block);
-#endif
       }
 
       ASSERT_EQ(CHECK_ABI(vpaes_set_decrypt_key, kKey, bits, &key), 0);
diff --git a/crypto/fipsmodule/aes/internal.h b/crypto/fipsmodule/aes/internal.h
index 170dcc6..7730b00 100644
--- a/crypto/fipsmodule/aes/internal.h
+++ b/crypto/fipsmodule/aes/internal.h
@@ -33,9 +33,6 @@
 OPENSSL_INLINE int hwaes_capable(void) { return CRYPTO_is_AESNI_capable(); }
 
 #define VPAES
-#if defined(OPENSSL_X86_64)
-#define VPAES_CTR32
-#endif
 #define VPAES_CBC
 OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_SSSE3_capable(); }
 
@@ -47,7 +44,6 @@
 #if defined(OPENSSL_ARM)
 #define BSAES
 #define VPAES
-#define VPAES_CTR32
 OPENSSL_INLINE int bsaes_capable(void) { return CRYPTO_is_NEON_capable(); }
 OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_NEON_capable(); }
 #endif
@@ -55,7 +51,6 @@
 #if defined(OPENSSL_AARCH64)
 #define VPAES
 #define VPAES_CBC
-#define VPAES_CTR32
 OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_NEON_capable(); }
 #endif
 
@@ -199,10 +194,8 @@
 void vpaes_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
                        const AES_KEY *key, uint8_t *ivec, int enc);
 #endif
-#if defined(VPAES_CTR32)
 void vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
                                 const AES_KEY *key, const uint8_t ivec[16]);
-#endif
 #else
 OPENSSL_INLINE char vpaes_capable(void) { return 0; }
 
@@ -229,6 +222,11 @@
                                       uint8_t *ivec, int enc) {
   abort();
 }
+OPENSSL_INLINE void vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
+                                               size_t len, const AES_KEY *key,
+                                               const uint8_t ivec[16]) {
+  abort();
+}
 #endif  // !VPAES
 
 
diff --git a/crypto/fipsmodule/aes/mode_wrappers.cc.inc b/crypto/fipsmodule/aes/mode_wrappers.cc.inc
index 10d98a6..aa76867 100644
--- a/crypto/fipsmodule/aes/mode_wrappers.cc.inc
+++ b/crypto/fipsmodule/aes/mode_wrappers.cc.inc
@@ -62,15 +62,10 @@
     CRYPTO_ctr128_encrypt_ctr32(in, out, len, key, ivec, ecount_buf, num,
                                 aes_hw_ctr32_encrypt_blocks);
   } else if (vpaes_capable()) {
-#if defined(VPAES_CTR32)
     // TODO(davidben): On ARM, where |BSAES| is additionally defined, this could
     // use |vpaes_ctr32_encrypt_blocks_with_bsaes|.
     CRYPTO_ctr128_encrypt_ctr32(in, out, len, key, ivec, ecount_buf, num,
                                 vpaes_ctr32_encrypt_blocks);
-#else
-    CRYPTO_ctr128_encrypt(in, out, len, key, ivec, ecount_buf, num,
-                          vpaes_encrypt);
-#endif
   } else {
     CRYPTO_ctr128_encrypt_ctr32(in, out, len, key, ivec, ecount_buf, num,
                                 aes_nohw_ctr32_encrypt_blocks);
diff --git a/crypto/fipsmodule/cipher/e_aes.cc.inc b/crypto/fipsmodule/cipher/e_aes.cc.inc
index 9774268..30957f0 100644
--- a/crypto/fipsmodule/cipher/e_aes.cc.inc
+++ b/crypto/fipsmodule/cipher/e_aes.cc.inc
@@ -211,7 +211,7 @@
 #if defined(BSAES)
       assert(bsaes_capable());
       dat->stream.ctr = vpaes_ctr32_encrypt_blocks_with_bsaes;
-#elif defined(VPAES_CTR32)
+#else
       dat->stream.ctr = vpaes_ctr32_encrypt_blocks;
 #endif
     }
@@ -221,6 +221,8 @@
     dat->stream.cbc = NULL;
     if (mode == EVP_CIPH_CBC_MODE) {
       dat->stream.cbc = aes_nohw_cbc_encrypt;
+    } else if (mode == EVP_CIPH_CTR_MODE) {
+      dat->stream.ctr = aes_nohw_ctr32_encrypt_blocks;
     }
   }
 
@@ -267,14 +269,8 @@
 static int aes_ctr_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, const uint8_t *in,
                           size_t len) {
   EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data;
-
-  if (dat->stream.ctr) {
-    CRYPTO_ctr128_encrypt_ctr32(in, out, len, &dat->ks.ks, ctx->iv, ctx->buf,
-                                &ctx->num, dat->stream.ctr);
-  } else {
-    CRYPTO_ctr128_encrypt(in, out, len, &dat->ks.ks, ctx->iv, ctx->buf,
-                          &ctx->num, dat->block);
-  }
+  CRYPTO_ctr128_encrypt_ctr32(in, out, len, &dat->ks.ks, ctx->iv, ctx->buf,
+                              &ctx->num, dat->stream.ctr);
   return 1;
 }
 
@@ -314,10 +310,8 @@
 #if defined(BSAES)
     assert(bsaes_capable());
     return vpaes_ctr32_encrypt_blocks_with_bsaes;
-#elif defined(VPAES_CTR32)
-    return vpaes_ctr32_encrypt_blocks;
 #else
-    return NULL;
+    return vpaes_ctr32_encrypt_blocks;
 #endif
   }
 
@@ -556,26 +550,14 @@
         return -1;
       }
     } else if (ctx->encrypt) {
-      if (gctx->ctr) {
-        if (!CRYPTO_gcm128_encrypt_ctr32(&gctx->gcm, &gctx->ks.ks, in, out, len,
-                                         gctx->ctr)) {
-          return -1;
-        }
-      } else {
-        if (!CRYPTO_gcm128_encrypt(&gctx->gcm, &gctx->ks.ks, in, out, len)) {
-          return -1;
-        }
+      if (!CRYPTO_gcm128_encrypt_ctr32(&gctx->gcm, &gctx->ks.ks, in, out, len,
+                                       gctx->ctr)) {
+        return -1;
       }
     } else {
-      if (gctx->ctr) {
-        if (!CRYPTO_gcm128_decrypt_ctr32(&gctx->gcm, &gctx->ks.ks, in, out, len,
-                                         gctx->ctr)) {
-          return -1;
-        }
-      } else {
-        if (!CRYPTO_gcm128_decrypt(&gctx->gcm, &gctx->ks.ks, in, out, len)) {
-          return -1;
-        }
+      if (!CRYPTO_gcm128_decrypt_ctr32(&gctx->gcm, &gctx->ks.ks, in, out, len,
+                                       gctx->ctr)) {
+        return -1;
       }
     }
     return (int)len;
@@ -973,28 +955,14 @@
     return 0;
   }
 
-  if (gcm_ctx->ctr) {
-    if (!CRYPTO_gcm128_encrypt_ctr32(&gcm, key, in, out, in_len,
-                                     gcm_ctx->ctr)) {
-      return 0;
-    }
-  } else {
-    if (!CRYPTO_gcm128_encrypt(&gcm, key, in, out, in_len)) {
-      return 0;
-    }
+  if (!CRYPTO_gcm128_encrypt_ctr32(&gcm, key, in, out, in_len, gcm_ctx->ctr)) {
+    return 0;
   }
 
-  if (extra_in_len) {
-    if (gcm_ctx->ctr) {
-      if (!CRYPTO_gcm128_encrypt_ctr32(&gcm, key, extra_in, out_tag,
-                                       extra_in_len, gcm_ctx->ctr)) {
-        return 0;
-      }
-    } else {
-      if (!CRYPTO_gcm128_encrypt(&gcm, key, extra_in, out_tag, extra_in_len)) {
-        return 0;
-      }
-    }
+  if (extra_in_len > 0 &&
+      !CRYPTO_gcm128_encrypt_ctr32(&gcm, key, extra_in, out_tag, extra_in_len,
+                                   gcm_ctx->ctr)) {
+    return 0;
   }
 
   CRYPTO_gcm128_tag(&gcm, out_tag + extra_in_len, tag_len);
@@ -1044,15 +1012,8 @@
     return 0;
   }
 
-  if (gcm_ctx->ctr) {
-    if (!CRYPTO_gcm128_decrypt_ctr32(&gcm, key, in, out, in_len,
-                                     gcm_ctx->ctr)) {
-      return 0;
-    }
-  } else {
-    if (!CRYPTO_gcm128_decrypt(&gcm, key, in, out, in_len)) {
-      return 0;
-    }
+  if (!CRYPTO_gcm128_decrypt_ctr32(&gcm, key, in, out, in_len, gcm_ctx->ctr)) {
+    return 0;
   }
 
   CRYPTO_gcm128_tag(&gcm, tag, tag_len);
diff --git a/crypto/fipsmodule/cipher/e_aesccm.cc.inc b/crypto/fipsmodule/cipher/e_aesccm.cc.inc
index b20690e..e1682b6 100644
--- a/crypto/fipsmodule/cipher/e_aesccm.cc.inc
+++ b/crypto/fipsmodule/cipher/e_aesccm.cc.inc
@@ -188,13 +188,8 @@
 
   uint8_t partial_buf[16];
   unsigned num = 0;
-  if (ctx->ctr != NULL) {
-    CRYPTO_ctr128_encrypt_ctr32(in, out, len, key, state->nonce, partial_buf,
-                                &num, ctx->ctr);
-  } else {
-    CRYPTO_ctr128_encrypt(in, out, len, key, state->nonce, partial_buf, &num,
-                          ctx->block);
-  }
+  CRYPTO_ctr128_encrypt_ctr32(in, out, len, key, state->nonce, partial_buf,
+                              &num, ctx->ctr);
   return 1;
 }
 
diff --git a/crypto/fipsmodule/cipher/internal.h b/crypto/fipsmodule/cipher/internal.h
index af902e0..e9709f0 100644
--- a/crypto/fipsmodule/cipher/internal.h
+++ b/crypto/fipsmodule/cipher/internal.h
@@ -152,7 +152,7 @@
 // where |key_bytes| must either be 16, 24 or 32. If not NULL, |*out_block| is
 // set to a function that encrypts single blocks. If not NULL, |*gcm_key| is
 // initialised to do GHASH with the given key. It returns a function for
-// optimised CTR-mode, or NULL if CTR-mode should be built using |*out_block|.
+// optimised CTR-mode.
 ctr128_f aes_ctr_set_key(AES_KEY *aes_key, GCM128_KEY *gcm_key,
                          block128_f *out_block, const uint8_t *key,
                          size_t key_bytes);
diff --git a/crypto/fipsmodule/modes/ctr.cc.inc b/crypto/fipsmodule/modes/ctr.cc.inc
index 8c333bb..3eae832 100644
--- a/crypto/fipsmodule/modes/ctr.cc.inc
+++ b/crypto/fipsmodule/modes/ctr.cc.inc
@@ -53,71 +53,9 @@
 #include "../../internal.h"
 
 
-// NOTE: the IV/counter CTR mode is big-endian.  The code itself
-// is endian-neutral.
-
-// increment counter (128-bit int) by 1
-static void ctr128_inc(uint8_t *counter) {
-  uint32_t n = 16, c = 1;
-
-  do {
-    --n;
-    c += counter[n];
-    counter[n] = (uint8_t) c;
-    c >>= 8;
-  } while (n);
-}
-
 static_assert(16 % sizeof(crypto_word_t) == 0,
               "block cannot be divided into crypto_word_t");
 
-// The input encrypted as though 128bit counter mode is being used.  The extra
-// state information to record how much of the 128bit block we have used is
-// contained in *num, and the encrypted counter is kept in ecount_buf.  Both
-// *num and ecount_buf must be initialised with zeros before the first call to
-// CRYPTO_ctr128_encrypt().
-//
-// This algorithm assumes that the counter is in the x lower bits of the IV
-// (ivec), and that the application has full control over overflow and the rest
-// of the IV.  This implementation takes NO responsibility for checking that
-// the counter doesn't overflow into the rest of the IV when incremented.
-void CRYPTO_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
-                           const AES_KEY *key, uint8_t ivec[16],
-                           uint8_t ecount_buf[16], unsigned int *num,
-                           block128_f block) {
-  unsigned int n;
-
-  assert(key && ecount_buf && num);
-  assert(len == 0 || (in && out));
-  assert(*num < 16);
-
-  n = *num;
-
-  while (n && len) {
-    *(out++) = *(in++) ^ ecount_buf[n];
-    --len;
-    n = (n + 1) % 16;
-  }
-  while (len >= 16) {
-    (*block)(ivec, ecount_buf, key);
-    ctr128_inc(ivec);
-    CRYPTO_xor16(out, in, ecount_buf);
-    len -= 16;
-    out += 16;
-    in += 16;
-    n = 0;
-  }
-  if (len) {
-    (*block)(ivec, ecount_buf, key);
-    ctr128_inc(ivec);
-    while (len--) {
-      out[n] = in[n] ^ ecount_buf[n];
-      ++n;
-    }
-  }
-  *num = n;
-}
-
 // increment upper 96 bits of 128-bit counter by 1
 static void ctr96_inc(uint8_t *counter) {
   uint32_t n = 12, c = 1;
diff --git a/crypto/fipsmodule/modes/gcm.cc.inc b/crypto/fipsmodule/modes/gcm.cc.inc
index 2d67eea..cbd1858 100644
--- a/crypto/fipsmodule/modes/gcm.cc.inc
+++ b/crypto/fipsmodule/modes/gcm.cc.inc
@@ -406,173 +406,6 @@
   return 1;
 }
 
-int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
-                          const uint8_t *in, uint8_t *out, size_t len) {
-  block128_f block = ctx->gcm_key.block;
-#ifdef GCM_FUNCREF
-  void (*gcm_gmult_p)(uint8_t Xi[16], const u128 Htable[16]) =
-      ctx->gcm_key.gmult;
-  void (*gcm_ghash_p)(uint8_t Xi[16], const u128 Htable[16], const uint8_t *inp,
-                      size_t len) = ctx->gcm_key.ghash;
-#endif
-
-  uint64_t mlen = ctx->len.msg + len;
-  if (mlen > ((UINT64_C(1) << 36) - 32) ||
-      (sizeof(len) == 8 && mlen < len)) {
-    return 0;
-  }
-  ctx->len.msg = mlen;
-
-  if (ctx->ares) {
-    // First call to encrypt finalizes GHASH(AAD)
-    GCM_MUL(ctx, Xi);
-    ctx->ares = 0;
-  }
-
-  unsigned n = ctx->mres;
-  if (n) {
-    while (n && len) {
-      ctx->Xi[n] ^= *(out++) = *(in++) ^ ctx->EKi[n];
-      --len;
-      n = (n + 1) % 16;
-    }
-    if (n == 0) {
-      GCM_MUL(ctx, Xi);
-    } else {
-      ctx->mres = n;
-      return 1;
-    }
-  }
-
-  uint32_t ctr = CRYPTO_load_u32_be(ctx->Yi + 12);
-  while (len >= GHASH_CHUNK) {
-    size_t j = GHASH_CHUNK;
-
-    while (j) {
-      (*block)(ctx->Yi, ctx->EKi, key);
-      ++ctr;
-      CRYPTO_store_u32_be(ctx->Yi + 12, ctr);
-      CRYPTO_xor16(out, in, ctx->EKi);
-      out += 16;
-      in += 16;
-      j -= 16;
-    }
-    GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
-    len -= GHASH_CHUNK;
-  }
-  size_t len_blocks = len & kSizeTWithoutLower4Bits;
-  if (len_blocks != 0) {
-    while (len >= 16) {
-      (*block)(ctx->Yi, ctx->EKi, key);
-      ++ctr;
-      CRYPTO_store_u32_be(ctx->Yi + 12, ctr);
-      CRYPTO_xor16(out, in, ctx->EKi);
-      out += 16;
-      in += 16;
-      len -= 16;
-    }
-    GHASH(ctx, out - len_blocks, len_blocks);
-  }
-  if (len) {
-    (*block)(ctx->Yi, ctx->EKi, key);
-    ++ctr;
-    CRYPTO_store_u32_be(ctx->Yi + 12, ctr);
-    while (len--) {
-      ctx->Xi[n] ^= out[n] = in[n] ^ ctx->EKi[n];
-      ++n;
-    }
-  }
-
-  ctx->mres = n;
-  return 1;
-}
-
-int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
-                          const unsigned char *in, unsigned char *out,
-                          size_t len) {
-  block128_f block = ctx->gcm_key.block;
-#ifdef GCM_FUNCREF
-  void (*gcm_gmult_p)(uint8_t Xi[16], const u128 Htable[16]) =
-      ctx->gcm_key.gmult;
-  void (*gcm_ghash_p)(uint8_t Xi[16], const u128 Htable[16], const uint8_t *inp,
-                      size_t len) = ctx->gcm_key.ghash;
-#endif
-
-  uint64_t mlen = ctx->len.msg + len;
-  if (mlen > ((UINT64_C(1) << 36) - 32) ||
-      (sizeof(len) == 8 && mlen < len)) {
-    return 0;
-  }
-  ctx->len.msg = mlen;
-
-  if (ctx->ares) {
-    // First call to decrypt finalizes GHASH(AAD)
-    GCM_MUL(ctx, Xi);
-    ctx->ares = 0;
-  }
-
-  unsigned n = ctx->mres;
-  if (n) {
-    while (n && len) {
-      uint8_t c = *(in++);
-      *(out++) = c ^ ctx->EKi[n];
-      ctx->Xi[n] ^= c;
-      --len;
-      n = (n + 1) % 16;
-    }
-    if (n == 0) {
-      GCM_MUL(ctx, Xi);
-    } else {
-      ctx->mres = n;
-      return 1;
-    }
-  }
-
-  uint32_t ctr = CRYPTO_load_u32_be(ctx->Yi + 12);
-  while (len >= GHASH_CHUNK) {
-    size_t j = GHASH_CHUNK;
-
-    GHASH(ctx, in, GHASH_CHUNK);
-    while (j) {
-      (*block)(ctx->Yi, ctx->EKi, key);
-      ++ctr;
-      CRYPTO_store_u32_be(ctx->Yi + 12, ctr);
-      CRYPTO_xor16(out, in, ctx->EKi);
-      out += 16;
-      in += 16;
-      j -= 16;
-    }
-    len -= GHASH_CHUNK;
-  }
-  size_t len_blocks = len & kSizeTWithoutLower4Bits;
-  if (len_blocks != 0) {
-    GHASH(ctx, in, len_blocks);
-    while (len >= 16) {
-      (*block)(ctx->Yi, ctx->EKi, key);
-      ++ctr;
-      CRYPTO_store_u32_be(ctx->Yi + 12, ctr);
-      CRYPTO_xor16(out, in, ctx->EKi);
-      out += 16;
-      in += 16;
-      len -= 16;
-    }
-  }
-  if (len) {
-    (*block)(ctx->Yi, ctx->EKi, key);
-    ++ctr;
-    CRYPTO_store_u32_be(ctx->Yi + 12, ctr);
-    while (len--) {
-      uint8_t c = in[n];
-      ctx->Xi[n] ^= c;
-      out[n] = c ^ ctx->EKi[n];
-      ++n;
-    }
-  }
-
-  ctx->mres = n;
-  return 1;
-}
-
 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const AES_KEY *key,
                                 const uint8_t *in, uint8_t *out, size_t len,
                                 ctr128_f stream) {
diff --git a/crypto/fipsmodule/modes/internal.h b/crypto/fipsmodule/modes/internal.h
index 4cedc39..263b314 100644
--- a/crypto/fipsmodule/modes/internal.h
+++ b/crypto/fipsmodule/modes/internal.h
@@ -97,21 +97,14 @@
 typedef void (*ctr128_f)(const uint8_t *in, uint8_t *out, size_t blocks,
                          const AES_KEY *key, const uint8_t ivec[16]);
 
-// CRYPTO_ctr128_encrypt encrypts (or decrypts, it's the same in CTR mode)
+// CRYPTO_ctr128_encrypt_ctr32 encrypts (or decrypts, it's the same in CTR mode)
 // |len| bytes from |in| to |out| using |block| in counter mode. There's no
 // requirement that |len| be a multiple of any value and any partial blocks are
 // stored in |ecount_buf| and |*num|, which must be zeroed before the initial
 // call. The counter is a 128-bit, big-endian value in |ivec| and is
-// incremented by this function.
-void CRYPTO_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
-                           const AES_KEY *key, uint8_t ivec[16],
-                           uint8_t ecount_buf[16], unsigned *num,
-                           block128_f block);
-
-// CRYPTO_ctr128_encrypt_ctr32 acts like |CRYPTO_ctr128_encrypt| but takes
-// |ctr|, a function that performs CTR mode but only deals with the lower 32
-// bits of the counter. This is useful when |ctr| can be an optimised
-// function.
+// incremented by this function. If the counter overflows, it wraps around.
+// |ctr| must be a function that performs CTR mode but only deals with the lower
+// 32 bits of the counter.
 void CRYPTO_ctr128_encrypt_ctr32(const uint8_t *in, uint8_t *out, size_t len,
                                  const AES_KEY *key, uint8_t ivec[16],
                                  uint8_t ecount_buf[16], unsigned *num,
@@ -207,18 +200,6 @@
 // and zero otherwise.
 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len);
 
-// CRYPTO_gcm128_encrypt encrypts |len| bytes from |in| to |out|. The |key|
-// must be the same key that was passed to |CRYPTO_gcm128_init|. It returns one
-// on success and zero otherwise.
-int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
-                          const uint8_t *in, uint8_t *out, size_t len);
-
-// CRYPTO_gcm128_decrypt decrypts |len| bytes from |in| to |out|. The |key|
-// must be the same key that was passed to |CRYPTO_gcm128_init|. It returns one
-// on success and zero otherwise.
-int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
-                          const uint8_t *in, uint8_t *out, size_t len);
-
 // CRYPTO_gcm128_encrypt_ctr32 encrypts |len| bytes from |in| to |out| using
 // a CTR function that only handles the bottom 32 bits of the nonce, like
 // |CRYPTO_ctr128_encrypt_ctr32|. The |key| must be the same key that was
diff --git a/crypto/fipsmodule/rand/ctrdrbg.cc.inc b/crypto/fipsmodule/rand/ctrdrbg.cc.inc
index 239a2ec..c60eb8d 100644
--- a/crypto/fipsmodule/rand/ctrdrbg.cc.inc
+++ b/crypto/fipsmodule/rand/ctrdrbg.cc.inc
@@ -181,17 +181,10 @@
     todo &= ~(AES_BLOCK_SIZE - 1);
     const size_t num_blocks = todo / AES_BLOCK_SIZE;
 
-    if (drbg->ctr) {
-      OPENSSL_memset(out, 0, todo);
-      ctr32_add(drbg, 1);
-      drbg->ctr(out, out, num_blocks, &drbg->ks, drbg->counter);
-      ctr32_add(drbg, (uint32_t)(num_blocks - 1));
-    } else {
-      for (size_t i = 0; i < todo; i += AES_BLOCK_SIZE) {
-        ctr32_add(drbg, 1);
-        drbg->block(drbg->counter, out + i, &drbg->ks);
-      }
-    }
+    OPENSSL_memset(out, 0, todo);
+    ctr32_add(drbg, 1);
+    drbg->ctr(out, out, num_blocks, &drbg->ks, drbg->counter);
+    ctr32_add(drbg, (uint32_t)(num_blocks - 1));
 
     out += todo;
     out_len -= todo;