Revert "Revert "Revert "Make x86(-64) use the same aes_hw_* infrastructure as POWER and the ARMs."""

gcm.c's AES-NI code wasn't triggering. (Thanks Brain for noting.)

Change-Id: Ic740e498b94fece180ac35c449066aee1349cbd5
Reviewed-on: https://boringssl-review.googlesource.com/28424
Reviewed-by: Adam Langley <alangley@gmail.com>
Commit-Queue: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
diff --git a/crypto/fipsmodule/aes/aes.c b/crypto/fipsmodule/aes/aes.c
index fa92a81..43e2e16 100644
--- a/crypto/fipsmodule/aes/aes.c
+++ b/crypto/fipsmodule/aes/aes.c
@@ -810,14 +810,6 @@
 // control asm symbol visibility with command line flags and such so they are
 // always hidden and wrapped by these C functions, which can be so
 // controlled.
-//
-// Be aware that on x86(-64), the asm_AES_* functions are incompatible with the
-// aes_hw_* functions. The latter set |AES_KEY.rounds| to one less than the true
-// value, which breaks the former. Therefore the two functions cannot mix.
-//
-// On AArch64, we don't have asm_AES_* functions and so must use the generic
-// versions when hardware support isn't provided. However, the Aarch64 assembly
-// doesn't have the same compatibility problem.
 
 void asm_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
 void AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
diff --git a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
index 6545274..a186941 100644
--- a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
@@ -188,7 +188,7 @@
 #	incurred by operations on %xmm8-15. As ECB is not considered
 #	critical, nothing was done to mitigate the problem.
 
-$PREFIX="aes_hw";	# if $PREFIX is set to "AES", the script
+$PREFIX="aesni";	# if $PREFIX is set to "AES", the script
 			# generates drop-in replacement for
 			# crypto/aes/asm/aes-x86_64.pl:-)
 
@@ -206,7 +206,7 @@
 open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
 *STDOUT=*OUT;
 
-$movkey = $PREFIX eq "aes_hw" ? "movups" : "movups";
+$movkey = $PREFIX eq "aesni" ? "movups" : "movups";
 @_4args=$win64?	("%rcx","%rdx","%r8", "%r9") :	# Win64 order
 		("%rdi","%rsi","%rdx","%rcx");	# Unix order
 
@@ -577,27 +577,27 @@
 .size	_aesni_${dir}rypt8,.-_aesni_${dir}rypt8
 ___
 }
-&aesni_generate2("enc") if ($PREFIX eq "aes_hw");
+&aesni_generate2("enc") if ($PREFIX eq "aesni");
 &aesni_generate2("dec");
-&aesni_generate3("enc") if ($PREFIX eq "aes_hw");
+&aesni_generate3("enc") if ($PREFIX eq "aesni");
 &aesni_generate3("dec");
-&aesni_generate4("enc") if ($PREFIX eq "aes_hw");
+&aesni_generate4("enc") if ($PREFIX eq "aesni");
 &aesni_generate4("dec");
-&aesni_generate6("enc") if ($PREFIX eq "aes_hw");
+&aesni_generate6("enc") if ($PREFIX eq "aesni");
 &aesni_generate6("dec");
-&aesni_generate8("enc") if ($PREFIX eq "aes_hw");
+&aesni_generate8("enc") if ($PREFIX eq "aesni");
 &aesni_generate8("dec");
 
-if ($PREFIX eq "aes_hw") {
+if ($PREFIX eq "aesni") {
 ########################################################################
 # void aesni_ecb_encrypt (const void *in, void *out,
 #			  size_t length, const AES_KEY *key,
 #			  int enc);
 $code.=<<___;
-.globl	${PREFIX}_ecb_encrypt
-.type	${PREFIX}_ecb_encrypt,\@function,5
+.globl	aesni_ecb_encrypt
+.type	aesni_ecb_encrypt,\@function,5
 .align	16
-${PREFIX}_ecb_encrypt:
+aesni_ecb_encrypt:
 ___
 $code.=<<___ if ($win64);
 	lea	-0x58(%rsp),%rsp
@@ -943,7 +943,7 @@
 ___
 $code.=<<___;
 	ret
-.size	${PREFIX}_ecb_encrypt,.-${PREFIX}_ecb_encrypt
+.size	aesni_ecb_encrypt,.-aesni_ecb_encrypt
 ___
 
 {
@@ -964,10 +964,10 @@
 my $bswap_mask="%xmm7";
 
 $code.=<<___;
-.globl	${PREFIX}_ccm64_encrypt_blocks
-.type	${PREFIX}_ccm64_encrypt_blocks,\@function,6
+.globl	aesni_ccm64_encrypt_blocks
+.type	aesni_ccm64_encrypt_blocks,\@function,6
 .align	16
-${PREFIX}_ccm64_encrypt_blocks:
+aesni_ccm64_encrypt_blocks:
 ___
 $code.=<<___ if ($win64);
 	lea	-0x58(%rsp),%rsp
@@ -1050,14 +1050,14 @@
 ___
 $code.=<<___;
 	ret
-.size	${PREFIX}_ccm64_encrypt_blocks,.-${PREFIX}_ccm64_encrypt_blocks
+.size	aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
 ___
 ######################################################################
 $code.=<<___;
-.globl	${PREFIX}_ccm64_decrypt_blocks
-.type	${PREFIX}_ccm64_decrypt_blocks,\@function,6
+.globl	aesni_ccm64_decrypt_blocks
+.type	aesni_ccm64_decrypt_blocks,\@function,6
 .align	16
-${PREFIX}_ccm64_decrypt_blocks:
+aesni_ccm64_decrypt_blocks:
 ___
 $code.=<<___ if ($win64);
 	lea	-0x58(%rsp),%rsp
@@ -1157,7 +1157,7 @@
 ___
 $code.=<<___;
 	ret
-.size	${PREFIX}_ccm64_decrypt_blocks,.-${PREFIX}_ccm64_decrypt_blocks
+.size	aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
 ___
 }
 ######################################################################
@@ -1178,10 +1178,10 @@
 my $frame_size = 0x80 + ($win64?160:0);
 
 $code.=<<___;
-.globl	${PREFIX}_ctr32_encrypt_blocks
-.type	${PREFIX}_ctr32_encrypt_blocks,\@function,5
+.globl	aesni_ctr32_encrypt_blocks
+.type	aesni_ctr32_encrypt_blocks,\@function,5
 .align	16
-${PREFIX}_ctr32_encrypt_blocks:
+aesni_ctr32_encrypt_blocks:
 .cfi_startproc
 	cmp	\$1,$len
 	jne	.Lctr32_bulk
@@ -1734,7 +1734,7 @@
 .Lctr32_epilogue:
 	ret
 .cfi_endproc
-.size	${PREFIX}_ctr32_encrypt_blocks,.-${PREFIX}_ctr32_encrypt_blocks
+.size	aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
 ___
 }
 
@@ -1751,10 +1751,10 @@
 my $key_ = "%rbp";	# override so that we can use %r11 as FP
 
 $code.=<<___;
-.globl	${PREFIX}_xts_encrypt
-.type	${PREFIX}_xts_encrypt,\@function,6
+.globl	aesni_xts_encrypt
+.type	aesni_xts_encrypt,\@function,6
 .align	16
-${PREFIX}_xts_encrypt:
+aesni_xts_encrypt:
 .cfi_startproc
 	lea	(%rsp),%r11			# frame pointer
 .cfi_def_cfa_register	%r11
@@ -2230,14 +2230,14 @@
 .Lxts_enc_epilogue:
 	ret
 .cfi_endproc
-.size	${PREFIX}_xts_encrypt,.-${PREFIX}_xts_encrypt
+.size	aesni_xts_encrypt,.-aesni_xts_encrypt
 ___
 
 $code.=<<___;
-.globl	${PREFIX}_xts_decrypt
-.type	${PREFIX}_xts_decrypt,\@function,6
+.globl	aesni_xts_decrypt
+.type	aesni_xts_decrypt,\@function,6
 .align	16
-${PREFIX}_xts_decrypt:
+aesni_xts_decrypt:
 .cfi_startproc
 	lea	(%rsp),%r11			# frame pointer
 .cfi_def_cfa_register	%r11
@@ -2739,7 +2739,7 @@
 .Lxts_dec_epilogue:
 	ret
 .cfi_endproc
-.size	${PREFIX}_xts_decrypt,.-${PREFIX}_xts_decrypt
+.size	aesni_xts_decrypt,.-aesni_xts_decrypt
 ___
 }
 
@@ -2759,10 +2759,10 @@
 my $blocks = $len;
 
 $code.=<<___;
-.globl	${PREFIX}_ocb_encrypt
-.type	${PREFIX}_ocb_encrypt,\@function,6
+.globl	aesni_ocb_encrypt
+.type	aesni_ocb_encrypt,\@function,6
 .align	32
-${PREFIX}_ocb_encrypt:
+aesni_ocb_encrypt:
 .cfi_startproc
 	lea	(%rsp),%rax
 	push	%rbx
@@ -3011,7 +3011,7 @@
 .Locb_enc_epilogue:
 	ret
 .cfi_endproc
-.size	${PREFIX}_ocb_encrypt,.-${PREFIX}_ocb_encrypt
+.size	aesni_ocb_encrypt,.-aesni_ocb_encrypt
 
 .type	__ocb_encrypt6,\@abi-omnipotent
 .align	32
@@ -3219,10 +3219,10 @@
 	ret
 .size	__ocb_encrypt1,.-__ocb_encrypt1
 
-.globl	${PREFIX}_ocb_decrypt
-.type	${PREFIX}_ocb_decrypt,\@function,6
+.globl	aesni_ocb_decrypt
+.type	aesni_ocb_decrypt,\@function,6
 .align	32
-${PREFIX}_ocb_decrypt:
+aesni_ocb_decrypt:
 .cfi_startproc
 	lea	(%rsp),%rax
 	push	%rbx
@@ -3493,7 +3493,7 @@
 .Locb_dec_epilogue:
 	ret
 .cfi_endproc
-.size	${PREFIX}_ocb_decrypt,.-${PREFIX}_ocb_decrypt
+.size	aesni_ocb_decrypt,.-aesni_ocb_decrypt
 
 .type	__ocb_decrypt6,\@abi-omnipotent
 .align	32
@@ -4736,7 +4736,7 @@
 $code.=<<___;
 .extern	__imp_RtlVirtualUnwind
 ___
-$code.=<<___ if ($PREFIX eq "aes_hw");
+$code.=<<___ if ($PREFIX eq "aesni");
 .type	ecb_ccm64_se_handler,\@abi-omnipotent
 .align	16
 ecb_ccm64_se_handler:
@@ -4776,7 +4776,7 @@
 	lea	0x58(%rax),%rax		# adjust stack pointer
 
 	jmp	.Lcommon_seh_tail
-.size	${PREFIX}_ccm64_se_handler,.-${PREFIX}_ccm64_se_handler
+.size	ecb_ccm64_se_handler,.-ecb_ccm64_se_handler
 
 .type	ctr_xts_se_handler,\@abi-omnipotent
 .align	16
@@ -4968,37 +4968,37 @@
 .section	.pdata
 .align	4
 ___
-$code.=<<___ if ($PREFIX eq "aes_hw");
-	.rva	.LSEH_begin_${PREFIX}_ecb_encrypt
-	.rva	.LSEH_end_${PREFIX}_ecb_encrypt
+$code.=<<___ if ($PREFIX eq "aesni");
+	.rva	.LSEH_begin_aesni_ecb_encrypt
+	.rva	.LSEH_end_aesni_ecb_encrypt
 	.rva	.LSEH_info_ecb
 
-	.rva	.LSEH_begin_${PREFIX}_ccm64_encrypt_blocks
-	.rva	.LSEH_end_${PREFIX}_ccm64_encrypt_blocks
+	.rva	.LSEH_begin_aesni_ccm64_encrypt_blocks
+	.rva	.LSEH_end_aesni_ccm64_encrypt_blocks
 	.rva	.LSEH_info_ccm64_enc
 
-	.rva	.LSEH_begin_${PREFIX}_ccm64_decrypt_blocks
-	.rva	.LSEH_end_${PREFIX}_ccm64_decrypt_blocks
+	.rva	.LSEH_begin_aesni_ccm64_decrypt_blocks
+	.rva	.LSEH_end_aesni_ccm64_decrypt_blocks
 	.rva	.LSEH_info_ccm64_dec
 
-	.rva	.LSEH_begin_${PREFIX}_ctr32_encrypt_blocks
-	.rva	.LSEH_end_${PREFIX}_ctr32_encrypt_blocks
+	.rva	.LSEH_begin_aesni_ctr32_encrypt_blocks
+	.rva	.LSEH_end_aesni_ctr32_encrypt_blocks
 	.rva	.LSEH_info_ctr32
 
-	.rva	.LSEH_begin_${PREFIX}_xts_encrypt
-	.rva	.LSEH_end_${PREFIX}_xts_encrypt
+	.rva	.LSEH_begin_aesni_xts_encrypt
+	.rva	.LSEH_end_aesni_xts_encrypt
 	.rva	.LSEH_info_xts_enc
 
-	.rva	.LSEH_begin_${PREFIX}_xts_decrypt
-	.rva	.LSEH_end_${PREFIX}_xts_decrypt
+	.rva	.LSEH_begin_aesni_xts_decrypt
+	.rva	.LSEH_end_aesni_xts_decrypt
 	.rva	.LSEH_info_xts_dec
 
-	.rva	.LSEH_begin_${PREFIX}_ocb_encrypt
-	.rva	.LSEH_end_${PREFIX}_ocb_encrypt
+	.rva	.LSEH_begin_aesni_ocb_encrypt
+	.rva	.LSEH_end_aesni_ocb_encrypt
 	.rva	.LSEH_info_ocb_enc
 
-	.rva	.LSEH_begin_${PREFIX}_ocb_decrypt
-	.rva	.LSEH_end_${PREFIX}_ocb_decrypt
+	.rva	.LSEH_begin_aesni_ocb_decrypt
+	.rva	.LSEH_end_aesni_ocb_decrypt
 	.rva	.LSEH_info_ocb_dec
 ___
 $code.=<<___;
@@ -5016,7 +5016,7 @@
 .section	.xdata
 .align	8
 ___
-$code.=<<___ if ($PREFIX eq "aes_hw");
+$code.=<<___ if ($PREFIX eq "aesni");
 .LSEH_info_ecb:
 	.byte	9,0,0,0
 	.rva	ecb_ccm64_se_handler
diff --git a/crypto/fipsmodule/aes/internal.h b/crypto/fipsmodule/aes/internal.h
index 7c19b9c..45db9ee 100644
--- a/crypto/fipsmodule/aes/internal.h
+++ b/crypto/fipsmodule/aes/internal.h
@@ -24,30 +24,21 @@
 #endif
 
 
-#if !defined(OPENSSL_NO_ASM)
-
-#if defined(OPENSSL_X86_64)
-#define HWAES
-#define HWAES_ECB
-
-static int hwaes_capable(void) {
-  return (OPENSSL_ia32cap_P[1] & (1 << (57 - 32))) != 0;
-}
-#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
+#if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
 #define HWAES
 
 static int hwaes_capable(void) {
   return CRYPTO_is_ARMv8_AES_capable();
 }
-#elif defined(OPENSSL_PPC64LE)
+#endif  // !NO_ASM && (AES || AARCH64)
+
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_PPC64LE)
 #define HWAES
 
 static int hwaes_capable(void) {
   return CRYPTO_is_PPC64LE_vcrypto_capable();
 }
-#endif
-
-#endif  // !NO_ASM
+#endif  // !NO_ASM && PPC64LE
 
 
 #if defined(HWAES)
@@ -102,12 +93,6 @@
 
 #endif  // !HWAES
 
-
-#if defined(HWAES_ECB)
-void aes_hw_ecb_encrypt(const uint8_t *in, uint8_t *out, size_t length,
-                        const AES_KEY *key, const int enc);
-#endif
-
 #if defined(__cplusplus)
 }  // extern C
 #endif
diff --git a/crypto/fipsmodule/aes/mode_wrappers.c b/crypto/fipsmodule/aes/mode_wrappers.c
index bcfd1d2..34514db 100644
--- a/crypto/fipsmodule/aes/mode_wrappers.c
+++ b/crypto/fipsmodule/aes/mode_wrappers.c
@@ -6,7 +6,7 @@
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
+ *    notice, this list of conditions and the following disclaimer. 
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
@@ -50,7 +50,6 @@
 
 #include <assert.h>
 
-#include "../aes/internal.h"
 #include "../modes/internal.h"
 
 
@@ -73,33 +72,27 @@
   }
 }
 
-#if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_X86_64) || defined(OPENSSL_X86))
-void asm_AES_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
-                         const AES_KEY *key, uint8_t *ivec, const int enc);
-#endif
-
+#if defined(OPENSSL_NO_ASM) || \
+    (!defined(OPENSSL_X86_64) && !defined(OPENSSL_X86))
 void AES_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
                      const AES_KEY *key, uint8_t *ivec, const int enc) {
-#if defined(HWAES) && !defined(OPENSSL_AARCH64)
-  // Can't use |aes_hw_cbc_encrypt| here because |AES_set_encrypt_key| isn't
-  // compatible with it.
-  if (hwaes_capable()) {
-    aes_hw_cbc_encrypt(in, out, len, key, ivec, enc);
-    return;
-  }
-#endif
 
-#if !defined(OPENSSL_NO_ASM) && \
-    (defined(OPENSSL_X86_64) || defined(OPENSSL_X86))
-  asm_AES_cbc_encrypt(in, out, len, key, ivec, enc);
-#else
   if (enc) {
     CRYPTO_cbc128_encrypt(in, out, len, key, ivec, (block128_f)AES_encrypt);
   } else {
     CRYPTO_cbc128_decrypt(in, out, len, key, ivec, (block128_f)AES_decrypt);
   }
-#endif
 }
+#else
+
+void asm_AES_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                         const AES_KEY *key, uint8_t *ivec, const int enc);
+void AES_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                     const AES_KEY *key, uint8_t *ivec, const int enc) {
+  asm_AES_cbc_encrypt(in, out, len, key, ivec, enc);
+}
+
+#endif  // OPENSSL_NO_ASM || (!OPENSSL_X86_64 && !OPENSSL_X86)
 
 void AES_ofb128_encrypt(const uint8_t *in, uint8_t *out, size_t length,
                         const AES_KEY *key, uint8_t *ivec, int *num) {
diff --git a/crypto/fipsmodule/cipher/e_aes.c b/crypto/fipsmodule/cipher/e_aes.c
index 9c482ad..3617e48 100644
--- a/crypto/fipsmodule/cipher/e_aes.c
+++ b/crypto/fipsmodule/cipher/e_aes.c
@@ -189,6 +189,38 @@
 }
 #endif
 
+#if !defined(OPENSSL_NO_ASM) && \
+    (defined(OPENSSL_X86_64) || defined(OPENSSL_X86))
+int aesni_set_encrypt_key(const uint8_t *userKey, int bits, AES_KEY *key);
+int aesni_set_decrypt_key(const uint8_t *userKey, int bits, AES_KEY *key);
+
+void aesni_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+void aesni_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+
+void aesni_ecb_encrypt(const uint8_t *in, uint8_t *out, size_t length,
+                       const AES_KEY *key, int enc);
+void aesni_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
+                       const AES_KEY *key, uint8_t *ivec, int enc);
+
+#else
+
+// On other platforms, aesni_capable() will always return false and so the
+// following will never be called.
+static void aesni_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
+  abort();
+}
+static int aesni_set_encrypt_key(const uint8_t *userKey, int bits,
+                                 AES_KEY *key) {
+  abort();
+}
+static void aesni_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
+                                       size_t blocks, const void *key,
+                                       const uint8_t *ivec) {
+  abort();
+}
+
+#endif
+
 static int aes_init_key(EVP_CIPHER_CTX *ctx, const uint8_t *key,
                         const uint8_t *iv, int enc) {
   int ret, mode;
@@ -305,9 +337,22 @@
   return 1;
 }
 
+static char aesni_capable(void);
+
 ctr128_f aes_ctr_set_key(AES_KEY *aes_key, GCM128_CONTEXT *gcm_ctx,
                          block128_f *out_block, const uint8_t *key,
                          size_t key_bytes) {
+  if (aesni_capable()) {
+    aesni_set_encrypt_key(key, key_bytes * 8, aes_key);
+    if (gcm_ctx != NULL) {
+      CRYPTO_gcm128_init(gcm_ctx, aes_key, (block128_f)aesni_encrypt, 1);
+    }
+    if (out_block) {
+      *out_block = (block128_f) aesni_encrypt;
+    }
+    return (ctr128_f)aesni_ctr32_encrypt_blocks;
+  }
+
   if (hwaes_capable()) {
     aes_hw_set_encrypt_key(key, key_bytes * 8, aes_key);
     if (gcm_ctx != NULL) {
@@ -766,22 +811,126 @@
   out->ctrl = aes_gcm_ctrl;
 }
 
-#if defined(HWAES_ECB)
+#if !defined(OPENSSL_NO_ASM) && \
+    (defined(OPENSSL_X86_64) || defined(OPENSSL_X86))
 
-static int aes_hw_ecb_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out,
-                             const uint8_t *in, size_t len) {
+// AES-NI section.
+
+static char aesni_capable(void) {
+  return (OPENSSL_ia32cap_P[1] & (1 << (57 - 32))) != 0;
+}
+
+static int aesni_init_key(EVP_CIPHER_CTX *ctx, const uint8_t *key,
+                          const uint8_t *iv, int enc) {
+  int ret, mode;
+  EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data;
+
+  mode = ctx->cipher->flags & EVP_CIPH_MODE_MASK;
+  if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) {
+    ret = aesni_set_decrypt_key(key, ctx->key_len * 8, ctx->cipher_data);
+    dat->block = (block128_f)aesni_decrypt;
+    dat->stream.cbc =
+        mode == EVP_CIPH_CBC_MODE ? (cbc128_f)aesni_cbc_encrypt : NULL;
+  } else {
+    ret = aesni_set_encrypt_key(key, ctx->key_len * 8, ctx->cipher_data);
+    dat->block = (block128_f)aesni_encrypt;
+    if (mode == EVP_CIPH_CBC_MODE) {
+      dat->stream.cbc = (cbc128_f)aesni_cbc_encrypt;
+    } else if (mode == EVP_CIPH_CTR_MODE) {
+      dat->stream.ctr = (ctr128_f)aesni_ctr32_encrypt_blocks;
+    } else {
+      dat->stream.cbc = NULL;
+    }
+  }
+
+  if (ret < 0) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_AES_KEY_SETUP_FAILED);
+    return 0;
+  }
+
+  return 1;
+}
+
+static int aesni_cbc_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out,
+                            const uint8_t *in, size_t len) {
+  aesni_cbc_encrypt(in, out, len, ctx->cipher_data, ctx->iv, ctx->encrypt);
+
+  return 1;
+}
+
+static int aesni_ecb_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out,
+                            const uint8_t *in, size_t len) {
   size_t bl = ctx->cipher->block_size;
 
   if (len < bl) {
     return 1;
   }
 
-  aes_hw_ecb_encrypt(in, out, len, ctx->cipher_data, ctx->encrypt);
+  aesni_ecb_encrypt(in, out, len, ctx->cipher_data, ctx->encrypt);
 
   return 1;
 }
 
-DEFINE_LOCAL_DATA(EVP_CIPHER, aes_hw_128_ecb) {
+static int aesni_gcm_init_key(EVP_CIPHER_CTX *ctx, const uint8_t *key,
+                              const uint8_t *iv, int enc) {
+  EVP_AES_GCM_CTX *gctx = ctx->cipher_data;
+  if (!iv && !key) {
+    return 1;
+  }
+  if (key) {
+    aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
+    CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f)aesni_encrypt, 1);
+    gctx->ctr = (ctr128_f)aesni_ctr32_encrypt_blocks;
+    // If we have an iv can set it directly, otherwise use
+    // saved IV.
+    if (iv == NULL && gctx->iv_set) {
+      iv = gctx->iv;
+    }
+    if (iv) {
+      CRYPTO_gcm128_setiv(&gctx->gcm, &gctx->ks.ks, iv, gctx->ivlen);
+      gctx->iv_set = 1;
+    }
+    gctx->key_set = 1;
+  } else {
+    // If key set use IV, otherwise copy
+    if (gctx->key_set) {
+      CRYPTO_gcm128_setiv(&gctx->gcm, &gctx->ks.ks, iv, gctx->ivlen);
+    } else {
+      OPENSSL_memcpy(gctx->iv, iv, gctx->ivlen);
+    }
+    gctx->iv_set = 1;
+    gctx->iv_gen = 0;
+  }
+  return 1;
+}
+
+DEFINE_LOCAL_DATA(EVP_CIPHER, aesni_128_cbc) {
+  memset(out, 0, sizeof(EVP_CIPHER));
+
+  out->nid = NID_aes_128_cbc;
+  out->block_size = 16;
+  out->key_len = 16;
+  out->iv_len = 16;
+  out->ctx_size = sizeof(EVP_AES_KEY);
+  out->flags = EVP_CIPH_CBC_MODE;
+  out->init = aesni_init_key;
+  out->cipher = aesni_cbc_cipher;
+}
+
+DEFINE_LOCAL_DATA(EVP_CIPHER, aesni_128_ctr) {
+  memset(out, 0, sizeof(EVP_CIPHER));
+
+  out->nid = NID_aes_128_ctr;
+  out->block_size = 1;
+  out->key_len = 16;
+  out->iv_len = 16;
+  out->ctx_size = sizeof(EVP_AES_KEY);
+  out->flags = EVP_CIPH_CTR_MODE;
+  out->init = aesni_init_key;
+  out->cipher = aes_ctr_cipher;
+}
+
+DEFINE_LOCAL_DATA(EVP_CIPHER, aesni_128_ecb) {
   memset(out, 0, sizeof(EVP_CIPHER));
 
   out->nid = NID_aes_128_ecb;
@@ -789,11 +938,67 @@
   out->key_len = 16;
   out->ctx_size = sizeof(EVP_AES_KEY);
   out->flags = EVP_CIPH_ECB_MODE;
-  out->init = aes_init_key;
-  out->cipher = aes_hw_ecb_cipher;
+  out->init = aesni_init_key;
+  out->cipher = aesni_ecb_cipher;
 }
 
-DEFINE_LOCAL_DATA(EVP_CIPHER, aes_hw_192_ecb) {
+DEFINE_LOCAL_DATA(EVP_CIPHER, aesni_128_ofb) {
+  memset(out, 0, sizeof(EVP_CIPHER));
+
+  out->nid = NID_aes_128_ofb128;
+  out->block_size = 1;
+  out->key_len = 16;
+  out->iv_len = 16;
+  out->ctx_size = sizeof(EVP_AES_KEY);
+  out->flags = EVP_CIPH_OFB_MODE;
+  out->init = aesni_init_key;
+  out->cipher = aes_ofb_cipher;
+}
+
+DEFINE_LOCAL_DATA(EVP_CIPHER, aesni_128_gcm) {
+  memset(out, 0, sizeof(EVP_CIPHER));
+
+  out->nid = NID_aes_128_gcm;
+  out->block_size = 1;
+  out->key_len = 16;
+  out->iv_len = 12;
+  out->ctx_size = sizeof(EVP_AES_GCM_CTX);
+  out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV |
+               EVP_CIPH_FLAG_CUSTOM_CIPHER | EVP_CIPH_ALWAYS_CALL_INIT |
+               EVP_CIPH_CTRL_INIT | EVP_CIPH_FLAG_AEAD_CIPHER;
+  out->init = aesni_gcm_init_key;
+  out->cipher = aes_gcm_cipher;
+  out->cleanup = aes_gcm_cleanup;
+  out->ctrl = aes_gcm_ctrl;
+}
+
+DEFINE_LOCAL_DATA(EVP_CIPHER, aesni_192_cbc) {
+  memset(out, 0, sizeof(EVP_CIPHER));
+
+  out->nid = NID_aes_192_cbc;
+  out->block_size = 16;
+  out->key_len = 24;
+  out->iv_len = 16;
+  out->ctx_size = sizeof(EVP_AES_KEY);
+  out->flags = EVP_CIPH_CBC_MODE;
+  out->init = aesni_init_key;
+  out->cipher = aesni_cbc_cipher;
+}
+
+DEFINE_LOCAL_DATA(EVP_CIPHER, aesni_192_ctr) {
+  memset(out, 0, sizeof(EVP_CIPHER));
+
+  out->nid = NID_aes_192_ctr;
+  out->block_size = 1;
+  out->key_len = 24;
+  out->iv_len = 16;
+  out->ctx_size = sizeof(EVP_AES_KEY);
+  out->flags = EVP_CIPH_CTR_MODE;
+  out->init = aesni_init_key;
+  out->cipher = aes_ctr_cipher;
+}
+
+DEFINE_LOCAL_DATA(EVP_CIPHER, aesni_192_ecb) {
   memset(out, 0, sizeof(EVP_CIPHER));
 
   out->nid = NID_aes_192_ecb;
@@ -801,11 +1006,54 @@
   out->key_len = 24;
   out->ctx_size = sizeof(EVP_AES_KEY);
   out->flags = EVP_CIPH_ECB_MODE;
-  out->init = aes_init_key;
-  out->cipher = aes_hw_ecb_cipher;
+  out->init = aesni_init_key;
+  out->cipher = aesni_ecb_cipher;
 }
 
-DEFINE_LOCAL_DATA(EVP_CIPHER, aes_hw_256_ecb) {
+DEFINE_LOCAL_DATA(EVP_CIPHER, aesni_192_gcm) {
+  memset(out, 0, sizeof(EVP_CIPHER));
+
+  out->nid = NID_aes_192_gcm;
+  out->block_size = 1;
+  out->key_len = 24;
+  out->iv_len = 12;
+  out->ctx_size = sizeof(EVP_AES_GCM_CTX);
+  out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV |
+               EVP_CIPH_FLAG_CUSTOM_CIPHER | EVP_CIPH_ALWAYS_CALL_INIT |
+               EVP_CIPH_CTRL_INIT | EVP_CIPH_FLAG_AEAD_CIPHER;
+  out->init = aesni_gcm_init_key;
+  out->cipher = aes_gcm_cipher;
+  out->cleanup = aes_gcm_cleanup;
+  out->ctrl = aes_gcm_ctrl;
+}
+
+DEFINE_LOCAL_DATA(EVP_CIPHER, aesni_256_cbc) {
+  memset(out, 0, sizeof(EVP_CIPHER));
+
+  out->nid = NID_aes_256_cbc;
+  out->block_size = 16;
+  out->key_len = 32;
+  out->iv_len = 16;
+  out->ctx_size = sizeof(EVP_AES_KEY);
+  out->flags = EVP_CIPH_CBC_MODE;
+  out->init = aesni_init_key;
+  out->cipher = aesni_cbc_cipher;
+}
+
+DEFINE_LOCAL_DATA(EVP_CIPHER, aesni_256_ctr) {
+  memset(out, 0, sizeof(EVP_CIPHER));
+
+  out->nid = NID_aes_256_ctr;
+  out->block_size = 1;
+  out->key_len = 32;
+  out->iv_len = 16;
+  out->ctx_size = sizeof(EVP_AES_KEY);
+  out->flags = EVP_CIPH_CTR_MODE;
+  out->init = aesni_init_key;
+  out->cipher = aes_ctr_cipher;
+}
+
+DEFINE_LOCAL_DATA(EVP_CIPHER, aesni_256_ecb) {
   memset(out, 0, sizeof(EVP_CIPHER));
 
   out->nid = NID_aes_256_ecb;
@@ -813,50 +1061,80 @@
   out->key_len = 32;
   out->ctx_size = sizeof(EVP_AES_KEY);
   out->flags = EVP_CIPH_ECB_MODE;
-  out->init = aes_init_key;
-  out->cipher = aes_hw_ecb_cipher;
+  out->init = aesni_init_key;
+  out->cipher = aesni_ecb_cipher;
 }
 
-#define EVP_ECB_CIPHER_FUNCTION(keybits)            \
-  const EVP_CIPHER *EVP_aes_##keybits##_ecb(void) { \
-    if (hwaes_capable()) {                          \
-      return aes_hw_##keybits##_ecb();              \
-    }                                               \
-    return aes_##keybits##_ecb_generic();           \
+DEFINE_LOCAL_DATA(EVP_CIPHER, aesni_256_ofb) {
+  memset(out, 0, sizeof(EVP_CIPHER));
+
+  out->nid = NID_aes_256_ofb128;
+  out->block_size = 1;
+  out->key_len = 32;
+  out->iv_len = 16;
+  out->ctx_size = sizeof(EVP_AES_KEY);
+  out->flags = EVP_CIPH_OFB_MODE;
+  out->init = aesni_init_key;
+  out->cipher = aes_ofb_cipher;
+}
+
+DEFINE_LOCAL_DATA(EVP_CIPHER, aesni_256_gcm) {
+  memset(out, 0, sizeof(EVP_CIPHER));
+
+  out->nid = NID_aes_256_gcm;
+  out->block_size = 1;
+  out->key_len = 32;
+  out->iv_len = 12;
+  out->ctx_size = sizeof(EVP_AES_GCM_CTX);
+  out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV |
+               EVP_CIPH_FLAG_CUSTOM_CIPHER | EVP_CIPH_ALWAYS_CALL_INIT |
+               EVP_CIPH_CTRL_INIT | EVP_CIPH_CUSTOM_COPY |
+               EVP_CIPH_FLAG_AEAD_CIPHER;
+  out->init = aesni_gcm_init_key;
+  out->cipher = aes_gcm_cipher;
+  out->cleanup = aes_gcm_cleanup;
+  out->ctrl = aes_gcm_ctrl;
+}
+
+#define EVP_CIPHER_FUNCTION(keybits, mode)             \
+  const EVP_CIPHER *EVP_aes_##keybits##_##mode(void) { \
+    if (aesni_capable()) {                             \
+      return aesni_##keybits##_##mode();               \
+    } else {                                           \
+      return aes_##keybits##_##mode##_generic();       \
+    }                                                  \
   }
 
-#else
+#else  // ^^^  OPENSSL_X86_64 || OPENSSL_X86
 
-#define EVP_ECB_CIPHER_FUNCTION(keybits)            \
-  const EVP_CIPHER *EVP_aes_##keybits##_ecb(void) { \
-    return aes_##keybits##_ecb_generic();           \
-  }
-
-#endif  // HWAES_ECB
+static char aesni_capable(void) {
+  return 0;
+}
 
 #define EVP_CIPHER_FUNCTION(keybits, mode)             \
   const EVP_CIPHER *EVP_aes_##keybits##_##mode(void) { \
     return aes_##keybits##_##mode##_generic();         \
   }
 
+#endif
+
 EVP_CIPHER_FUNCTION(128, cbc)
 EVP_CIPHER_FUNCTION(128, ctr)
+EVP_CIPHER_FUNCTION(128, ecb)
 EVP_CIPHER_FUNCTION(128, ofb)
 EVP_CIPHER_FUNCTION(128, gcm)
 
 EVP_CIPHER_FUNCTION(192, cbc)
 EVP_CIPHER_FUNCTION(192, ctr)
+EVP_CIPHER_FUNCTION(192, ecb)
 EVP_CIPHER_FUNCTION(192, gcm)
 
 EVP_CIPHER_FUNCTION(256, cbc)
 EVP_CIPHER_FUNCTION(256, ctr)
+EVP_CIPHER_FUNCTION(256, ecb)
 EVP_CIPHER_FUNCTION(256, ofb)
 EVP_CIPHER_FUNCTION(256, gcm)
 
-EVP_ECB_CIPHER_FUNCTION(128)
-EVP_ECB_CIPHER_FUNCTION(192)
-EVP_ECB_CIPHER_FUNCTION(256)
-
 
 #define EVP_AEAD_AES_GCM_TAG_LEN 16
 
@@ -1151,7 +1429,7 @@
 
 int EVP_has_aes_hardware(void) {
 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
-  return hwaes_capable() && crypto_gcm_clmul_enabled();
+  return aesni_capable() && crypto_gcm_clmul_enabled();
 #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
   return hwaes_capable() && CRYPTO_is_ARMv8_PMULL_capable();
 #else