Use ctr32 optimizations for AES_ctr128_encrypt.

There are a decent number of uses of this function directly. I've
attached this to bug 338. Arguably it makes it worse, though it does
help with aligning on ctr32, if that works out.

Bug: 338
Change-Id: I3dfc1305d359ec0c88d4f298fe1928bef7ec9877
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/41426
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/aes/mode_wrappers.c b/crypto/fipsmodule/aes/mode_wrappers.c
index 206fcfd..d29fb27 100644
--- a/crypto/fipsmodule/aes/mode_wrappers.c
+++ b/crypto/fipsmodule/aes/mode_wrappers.c
@@ -57,7 +57,23 @@
 void AES_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
                         const AES_KEY *key, uint8_t ivec[AES_BLOCK_SIZE],
                         uint8_t ecount_buf[AES_BLOCK_SIZE], unsigned int *num) {
-  CRYPTO_ctr128_encrypt(in, out, len, key, ivec, ecount_buf, num, AES_encrypt);
+  if (hwaes_capable()) {
+    CRYPTO_ctr128_encrypt_ctr32(in, out, len, key, ivec, ecount_buf, num,
+                                aes_hw_ctr32_encrypt_blocks);
+  } else if (vpaes_capable()) {
+#if defined(VPAES_CTR32)
+    // TODO(davidben): On ARM, where |BSAES| is additionally defined, this could
+    // use |vpaes_ctr32_encrypt_blocks_with_bsaes|.
+    CRYPTO_ctr128_encrypt_ctr32(in, out, len, key, ivec, ecount_buf, num,
+                                vpaes_ctr32_encrypt_blocks);
+#else
+    CRYPTO_ctr128_encrypt(in, out, len, key, ivec, ecount_buf, num,
+                          vpaes_encrypt);
+#endif
+  } else {
+    CRYPTO_ctr128_encrypt_ctr32(in, out, len, key, ivec, ecount_buf, num,
+                                aes_nohw_ctr32_encrypt_blocks);
+  }
 }
 
 void AES_ecb_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key,