Extract the AES-NI encrypt -> decrypt assembly conversion

aes_hw_set_decrypt_key calls aes_hw_set_encrypt_key and then does a
conversion, all in assembly. On x86(_64), aes_hw_set_encrypt_key
internally checks OPENSSL_ia32cap_P to call one of two variants.

In preparation for splitting those variants into separate functions, get
the in-asm function call out o f the day by extracting an
aes_hw_encrypt_key_to_decrypt_key function.

Bug: 673
Change-Id: I23eefc00bdc8cb1f20e17fb6716974e91f1c32c4
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/68689
Reviewed-by: Bob Beck <bbe@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/gen/bcm/aesni-x86_64-apple.S b/gen/bcm/aesni-x86_64-apple.S
index 48d3cfc..ccf9f8f 100644
--- a/gen/bcm/aesni-x86_64-apple.S
+++ b/gen/bcm/aesni-x86_64-apple.S
@@ -1905,61 +1905,51 @@
 	ret
 
 
-.globl	_aes_hw_set_decrypt_key
-.private_extern _aes_hw_set_decrypt_key
+.globl	_aes_hw_encrypt_key_to_decrypt_key
+.private_extern _aes_hw_encrypt_key_to_decrypt_key
 
 .p2align	4
-_aes_hw_set_decrypt_key:
-
+_aes_hw_encrypt_key_to_decrypt_key:
 
 _CET_ENDBR
-	subq	$8,%rsp
 
-
-
-	call	__aesni_set_encrypt_key
+	movl	240(%rdi),%esi
 	shll	$4,%esi
-	testl	%eax,%eax
-	jnz	L$dec_key_ret
-	leaq	16(%rdx,%rsi,1),%rdi
 
-	movups	(%rdx),%xmm0
-	movups	(%rdi),%xmm1
-	movups	%xmm0,(%rdi)
-	movups	%xmm1,(%rdx)
-	leaq	16(%rdx),%rdx
-	leaq	-16(%rdi),%rdi
+	leaq	16(%rdi,%rsi,1),%rdx
+
+	movups	(%rdi),%xmm0
+	movups	(%rdx),%xmm1
+	movups	%xmm0,(%rdx)
+	movups	%xmm1,(%rdi)
+	leaq	16(%rdi),%rdi
+	leaq	-16(%rdx),%rdx
 
 L$dec_key_inverse:
-	movups	(%rdx),%xmm0
-	movups	(%rdi),%xmm1
+	movups	(%rdi),%xmm0
+	movups	(%rdx),%xmm1
 .byte	102,15,56,219,192
 .byte	102,15,56,219,201
-	leaq	16(%rdx),%rdx
-	leaq	-16(%rdi),%rdi
-	movups	%xmm0,16(%rdi)
-	movups	%xmm1,-16(%rdx)
-	cmpq	%rdx,%rdi
+	leaq	16(%rdi),%rdi
+	leaq	-16(%rdx),%rdx
+	movups	%xmm0,16(%rdx)
+	movups	%xmm1,-16(%rdi)
+	cmpq	%rdi,%rdx
 	ja	L$dec_key_inverse
 
-	movups	(%rdx),%xmm0
+	movups	(%rdi),%xmm0
 .byte	102,15,56,219,192
 	pxor	%xmm1,%xmm1
-	movups	%xmm0,(%rdi)
+	movups	%xmm0,(%rdx)
 	pxor	%xmm0,%xmm0
-L$dec_key_ret:
-	addq	$8,%rsp
-
 	ret
 
 
-
 .globl	_aes_hw_set_encrypt_key
 .private_extern _aes_hw_set_encrypt_key
 
 .p2align	4
 _aes_hw_set_encrypt_key:
-__aesni_set_encrypt_key:
 
 
 _CET_ENDBR
@@ -2331,7 +2321,6 @@
 	xorps	%xmm1,%xmm2
 	ret
 
-
 .section	__DATA,__const
 .p2align	6
 L$bswap_mask: