Extract the AES-NI encrypt -> decrypt assembly conversion
aes_hw_set_decrypt_key calls aes_hw_set_encrypt_key and then does a
conversion, all in assembly. On x86(_64), aes_hw_set_encrypt_key
internally checks OPENSSL_ia32cap_P to call one of two variants.
In preparation for splitting those variants into separate functions, get
the in-asm function call out o f the day by extracting an
aes_hw_encrypt_key_to_decrypt_key function.
Bug: 673
Change-Id: I23eefc00bdc8cb1f20e17fb6716974e91f1c32c4
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/68689
Reviewed-by: Bob Beck <bbe@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/aes/aes.c b/crypto/fipsmodule/aes/aes.c
index 60f3545..56dfbe2 100644
--- a/crypto/fipsmodule/aes/aes.c
+++ b/crypto/fipsmodule/aes/aes.c
@@ -104,3 +104,16 @@
return aes_nohw_set_decrypt_key(key, bits, aeskey);
}
}
+
+#if defined(HWAES) && (defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
+// On x86 and x86_64, |aes_hw_set_decrypt_key|, we implement
+// |aes_hw_encrypt_key_to_decrypt_key| in assembly and rely on C code to combine
+// the operations.
+int aes_hw_set_decrypt_key(const uint8_t *user_key, int bits, AES_KEY *key) {
+ int ret = aes_hw_set_encrypt_key(user_key, bits, key);
+ if (ret == 0) {
+ aes_hw_encrypt_key_to_decrypt_key(key);
+ }
+ return ret;
+}
+#endif
diff --git a/crypto/fipsmodule/aes/aes_test.cc b/crypto/fipsmodule/aes/aes_test.cc
index d4a458b..dc90067 100644
--- a/crypto/fipsmodule/aes/aes_test.cc
+++ b/crypto/fipsmodule/aes/aes_test.cc
@@ -346,7 +346,12 @@
#endif
}
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+ ASSERT_EQ(CHECK_ABI_SEH(aes_hw_set_encrypt_key, kKey, bits, &key), 0);
+ CHECK_ABI_SEH(aes_hw_encrypt_key_to_decrypt_key, &key);
+#else
ASSERT_EQ(CHECK_ABI_SEH(aes_hw_set_decrypt_key, kKey, bits, &key), 0);
+#endif
CHECK_ABI(aes_hw_decrypt, block, block, &key);
for (size_t blocks : block_counts) {
SCOPED_TRACE(blocks);
diff --git a/crypto/fipsmodule/aes/asm/aesni-x86.pl b/crypto/fipsmodule/aes/asm/aesni-x86.pl
index bb5ee94..d8fdfb8 100644
--- a/crypto/fipsmodule/aes/asm/aesni-x86.pl
+++ b/crypto/fipsmodule/aes/asm/aesni-x86.pl
@@ -2490,17 +2490,11 @@
&ret ();
&function_end_B("${PREFIX}_set_encrypt_key");
-# int $PREFIX_set_decrypt_key (const unsigned char *userKey, int bits,
-# AES_KEY *key)
-&function_begin_B("${PREFIX}_set_decrypt_key");
- &mov ("eax",&wparam(0));
- &mov ($rounds,&wparam(1));
- &mov ($key,&wparam(2));
- &call ("_aesni_set_encrypt_key");
- &mov ($key,&wparam(2));
- &shl ($rounds,4); # rounds-1 after _aesni_set_encrypt_key
- &test ("eax","eax");
- &jnz (&label("dec_key_ret"));
+# void $PREFIX_encrypt_key_to_decrypt_key (AES_KEY *key)
+&function_begin_B("${PREFIX}_encrypt_key_to_decrypt_key");
+ &mov ($key,&wparam(0));
+ &mov ($rounds,&DWP(240,$key));
+ &shl ($rounds,4);
&lea ("eax",&DWP(16,$key,$rounds)); # end of key schedule
&$movekey ("xmm0",&QWP(0,$key)); # just swap
@@ -2528,10 +2522,8 @@
&pxor ("xmm0","xmm0");
&pxor ("xmm1","xmm1");
- &xor ("eax","eax"); # return success
-&set_label("dec_key_ret");
&ret ();
-&function_end_B("${PREFIX}_set_decrypt_key");
+&function_end_B("${PREFIX}_encrypt_key_to_decrypt_key");
&set_label("key_const",64);
&data_word(0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d);
diff --git a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
index 17ec466..8b6036e 100644
--- a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
@@ -3172,69 +3172,55 @@
.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
___
}
-# int ${PREFIX}_set_decrypt_key(const unsigned char *inp,
-# int bits, AES_KEY *key)
-#
-# input: $inp user-supplied key
-# $bits $inp length in bits
-# $key pointer to key schedule
-# output: %eax 0 denoting success, -1 or -2 - failure (see C)
-# *$key key schedule
-#
-{ my ($inp,$bits,$key) = @_4args;
- $bits =~ s/%r/%e/;
+{ my ($key, $rounds, $tmp) = @_4args;
+ $rounds =~ s/%r/%e/;
+# void ${PREFIX}_encrypt_key_to_decrypt_key(AES_KEY *key)
$code.=<<___;
-.globl ${PREFIX}_set_decrypt_key
-.type ${PREFIX}_set_decrypt_key,\@abi-omnipotent
+.globl ${PREFIX}_encrypt_key_to_decrypt_key
+.type ${PREFIX}_encrypt_key_to_decrypt_key,\@abi-omnipotent
.align 16
-${PREFIX}_set_decrypt_key:
+${PREFIX}_encrypt_key_to_decrypt_key:
.cfi_startproc
-.seh_startproc
_CET_ENDBR
- sub \$8,%rsp
-.cfi_adjust_cfa_offset 8
-.seh_stackalloc 8
-.seh_endprologue
- call __aesni_set_encrypt_key
- shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key
- test %eax,%eax
- jnz .Ldec_key_ret
- lea 16($key,$bits),$inp # points at the end of key schedule
+
+ mov 240($key), $rounds
+ shl \$4,$rounds
+
+ lea 16($key,$rounds),$tmp # points at the end of key schedule
$movkey ($key),%xmm0 # just swap
- $movkey ($inp),%xmm1
- $movkey %xmm0,($inp)
+ $movkey ($tmp),%xmm1
+ $movkey %xmm0,($tmp)
$movkey %xmm1,($key)
lea 16($key),$key
- lea -16($inp),$inp
+ lea -16($tmp),$tmp
.Ldec_key_inverse:
$movkey ($key),%xmm0 # swap and inverse
- $movkey ($inp),%xmm1
+ $movkey ($tmp),%xmm1
aesimc %xmm0,%xmm0
aesimc %xmm1,%xmm1
lea 16($key),$key
- lea -16($inp),$inp
- $movkey %xmm0,16($inp)
+ lea -16($tmp),$tmp
+ $movkey %xmm0,16($tmp)
$movkey %xmm1,-16($key)
- cmp $key,$inp
+ cmp $key,$tmp
ja .Ldec_key_inverse
$movkey ($key),%xmm0 # inverse middle
aesimc %xmm0,%xmm0
pxor %xmm1,%xmm1
- $movkey %xmm0,($inp)
+ $movkey %xmm0,($tmp)
pxor %xmm0,%xmm0
-.Ldec_key_ret:
- add \$8,%rsp
-.cfi_adjust_cfa_offset -8
ret
.cfi_endproc
-.seh_endproc
-.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key
+.size ${PREFIX}_encrypt_key_to_decrypt_key,.-${PREFIX}_encrypt_key_to_decrypt_key
___
+}
+{ my ($inp,$bits,$key) = @_4args;
+ $bits =~ s/%r/%e/;
# This is based on submission from Intel by
# Huang Ying
# Vinodh Gopal
@@ -3264,7 +3250,6 @@
.type ${PREFIX}_set_encrypt_key,\@abi-omnipotent
.align 16
${PREFIX}_set_encrypt_key:
-__aesni_set_encrypt_key:
.cfi_startproc
.seh_startproc
_CET_ENDBR
@@ -3636,7 +3621,6 @@
xorps %xmm1,%xmm2
ret
.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key
-.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
___
}
diff --git a/crypto/fipsmodule/aes/internal.h b/crypto/fipsmodule/aes/internal.h
index 98b2a14d..e7f55d2 100644
--- a/crypto/fipsmodule/aes/internal.h
+++ b/crypto/fipsmodule/aes/internal.h
@@ -66,17 +66,21 @@
#if defined(HWAES)
-int aes_hw_set_encrypt_key(const uint8_t *user_key, const int bits,
- AES_KEY *key);
-int aes_hw_set_decrypt_key(const uint8_t *user_key, const int bits,
- AES_KEY *key);
+int aes_hw_set_encrypt_key(const uint8_t *user_key, int bits, AES_KEY *key);
+int aes_hw_set_decrypt_key(const uint8_t *user_key, int bits, AES_KEY *key);
void aes_hw_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
void aes_hw_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
void aes_hw_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
- const AES_KEY *key, uint8_t *ivec, const int enc);
+ const AES_KEY *key, uint8_t *ivec, int enc);
void aes_hw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, const uint8_t ivec[16]);
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+// On x86 and x86_64, |aes_hw_set_decrypt_key| is implemented in terms of
+// |aes_hw_set_encrypt_key| and a conversion function.
+void aes_hw_encrypt_key_to_decrypt_key(AES_KEY *key);
+#endif
+
#else
// If HWAES isn't defined then we provide dummy functions for each of the hwaes
@@ -120,7 +124,7 @@
#if defined(HWAES_ECB)
void aes_hw_ecb_encrypt(const uint8_t *in, uint8_t *out, size_t length,
- const AES_KEY *key, const int enc);
+ const AES_KEY *key, int enc);
#endif // HWAES_ECB
@@ -218,7 +222,7 @@
size_t blocks, const AES_KEY *key,
const uint8_t ivec[16]);
void aes_nohw_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
- const AES_KEY *key, uint8_t *ivec, const int enc);
+ const AES_KEY *key, uint8_t *ivec, int enc);
#if defined(__cplusplus)
diff --git a/gen/bcm/aesni-x86-apple.S b/gen/bcm/aesni-x86-apple.S
index cee5724..e64b4bb 100644
--- a/gen/bcm/aesni-x86-apple.S
+++ b/gen/bcm/aesni-x86-apple.S
@@ -2409,19 +2409,14 @@
movl 12(%esp),%edx
call __aesni_set_encrypt_key
ret
-.globl _aes_hw_set_decrypt_key
-.private_extern _aes_hw_set_decrypt_key
+.globl _aes_hw_encrypt_key_to_decrypt_key
+.private_extern _aes_hw_encrypt_key_to_decrypt_key
.align 4
-_aes_hw_set_decrypt_key:
-L_aes_hw_set_decrypt_key_begin:
- movl 4(%esp),%eax
- movl 8(%esp),%ecx
- movl 12(%esp),%edx
- call __aesni_set_encrypt_key
- movl 12(%esp),%edx
+_aes_hw_encrypt_key_to_decrypt_key:
+L_aes_hw_encrypt_key_to_decrypt_key_begin:
+ movl 4(%esp),%edx
+ movl 240(%edx),%ecx
shll $4,%ecx
- testl %eax,%eax
- jnz L116dec_key_ret
leal 16(%edx,%ecx,1),%eax
movups (%edx),%xmm0
movups (%eax),%xmm1
@@ -2429,7 +2424,7 @@
movups %xmm1,(%edx)
leal 16(%edx),%edx
leal -16(%eax),%eax
-L117dec_key_inverse:
+L116dec_key_inverse:
movups (%edx),%xmm0
movups (%eax),%xmm1
.byte 102,15,56,219,192
@@ -2439,14 +2434,12 @@
movups %xmm0,16(%eax)
movups %xmm1,-16(%edx)
cmpl %edx,%eax
- ja L117dec_key_inverse
+ ja L116dec_key_inverse
movups (%edx),%xmm0
.byte 102,15,56,219,192
movups %xmm0,(%edx)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
- xorl %eax,%eax
-L116dec_key_ret:
ret
.align 6,0x90
Lkey_const:
diff --git a/gen/bcm/aesni-x86-linux.S b/gen/bcm/aesni-x86-linux.S
index dbcded2..1f15c71 100644
--- a/gen/bcm/aesni-x86-linux.S
+++ b/gen/bcm/aesni-x86-linux.S
@@ -2447,20 +2447,15 @@
call _aesni_set_encrypt_key
ret
.size aes_hw_set_encrypt_key,.-.L_aes_hw_set_encrypt_key_begin
-.globl aes_hw_set_decrypt_key
-.hidden aes_hw_set_decrypt_key
-.type aes_hw_set_decrypt_key,@function
+.globl aes_hw_encrypt_key_to_decrypt_key
+.hidden aes_hw_encrypt_key_to_decrypt_key
+.type aes_hw_encrypt_key_to_decrypt_key,@function
.align 16
-aes_hw_set_decrypt_key:
-.L_aes_hw_set_decrypt_key_begin:
- movl 4(%esp),%eax
- movl 8(%esp),%ecx
- movl 12(%esp),%edx
- call _aesni_set_encrypt_key
- movl 12(%esp),%edx
+aes_hw_encrypt_key_to_decrypt_key:
+.L_aes_hw_encrypt_key_to_decrypt_key_begin:
+ movl 4(%esp),%edx
+ movl 240(%edx),%ecx
shll $4,%ecx
- testl %eax,%eax
- jnz .L116dec_key_ret
leal 16(%edx,%ecx,1),%eax
movups (%edx),%xmm0
movups (%eax),%xmm1
@@ -2468,7 +2463,7 @@
movups %xmm1,(%edx)
leal 16(%edx),%edx
leal -16(%eax),%eax
-.L117dec_key_inverse:
+.L116dec_key_inverse:
movups (%edx),%xmm0
movups (%eax),%xmm1
.byte 102,15,56,219,192
@@ -2478,16 +2473,14 @@
movups %xmm0,16(%eax)
movups %xmm1,-16(%edx)
cmpl %edx,%eax
- ja .L117dec_key_inverse
+ ja .L116dec_key_inverse
movups (%edx),%xmm0
.byte 102,15,56,219,192
movups %xmm0,(%edx)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
- xorl %eax,%eax
-.L116dec_key_ret:
ret
-.size aes_hw_set_decrypt_key,.-.L_aes_hw_set_decrypt_key_begin
+.size aes_hw_encrypt_key_to_decrypt_key,.-.L_aes_hw_encrypt_key_to_decrypt_key_begin
.align 64
.Lkey_const:
.long 202313229,202313229,202313229,202313229
diff --git a/gen/bcm/aesni-x86-win.asm b/gen/bcm/aesni-x86-win.asm
index ce95d23..2d32e77 100644
--- a/gen/bcm/aesni-x86-win.asm
+++ b/gen/bcm/aesni-x86-win.asm
@@ -2400,18 +2400,13 @@
mov edx,DWORD [12+esp]
call __aesni_set_encrypt_key
ret
-global _aes_hw_set_decrypt_key
+global _aes_hw_encrypt_key_to_decrypt_key
align 16
-_aes_hw_set_decrypt_key:
-L$_aes_hw_set_decrypt_key_begin:
- mov eax,DWORD [4+esp]
- mov ecx,DWORD [8+esp]
- mov edx,DWORD [12+esp]
- call __aesni_set_encrypt_key
- mov edx,DWORD [12+esp]
+_aes_hw_encrypt_key_to_decrypt_key:
+L$_aes_hw_encrypt_key_to_decrypt_key_begin:
+ mov edx,DWORD [4+esp]
+ mov ecx,DWORD [240+edx]
shl ecx,4
- test eax,eax
- jnz NEAR L$116dec_key_ret
lea eax,[16+ecx*1+edx]
movups xmm0,[edx]
movups xmm1,[eax]
@@ -2419,7 +2414,7 @@
movups [edx],xmm1
lea edx,[16+edx]
lea eax,[eax-16]
-L$117dec_key_inverse:
+L$116dec_key_inverse:
movups xmm0,[edx]
movups xmm1,[eax]
db 102,15,56,219,192
@@ -2429,14 +2424,12 @@
movups [16+eax],xmm0
movups [edx-16],xmm1
cmp eax,edx
- ja NEAR L$117dec_key_inverse
+ ja NEAR L$116dec_key_inverse
movups xmm0,[edx]
db 102,15,56,219,192
movups [edx],xmm0
pxor xmm0,xmm0
pxor xmm1,xmm1
- xor eax,eax
-L$116dec_key_ret:
ret
align 64
L$key_const:
diff --git a/gen/bcm/aesni-x86_64-apple.S b/gen/bcm/aesni-x86_64-apple.S
index 48d3cfc..ccf9f8f 100644
--- a/gen/bcm/aesni-x86_64-apple.S
+++ b/gen/bcm/aesni-x86_64-apple.S
@@ -1905,61 +1905,51 @@
ret
-.globl _aes_hw_set_decrypt_key
-.private_extern _aes_hw_set_decrypt_key
+.globl _aes_hw_encrypt_key_to_decrypt_key
+.private_extern _aes_hw_encrypt_key_to_decrypt_key
.p2align 4
-_aes_hw_set_decrypt_key:
-
+_aes_hw_encrypt_key_to_decrypt_key:
_CET_ENDBR
- subq $8,%rsp
-
-
- call __aesni_set_encrypt_key
+ movl 240(%rdi),%esi
shll $4,%esi
- testl %eax,%eax
- jnz L$dec_key_ret
- leaq 16(%rdx,%rsi,1),%rdi
- movups (%rdx),%xmm0
- movups (%rdi),%xmm1
- movups %xmm0,(%rdi)
- movups %xmm1,(%rdx)
- leaq 16(%rdx),%rdx
- leaq -16(%rdi),%rdi
+ leaq 16(%rdi,%rsi,1),%rdx
+
+ movups (%rdi),%xmm0
+ movups (%rdx),%xmm1
+ movups %xmm0,(%rdx)
+ movups %xmm1,(%rdi)
+ leaq 16(%rdi),%rdi
+ leaq -16(%rdx),%rdx
L$dec_key_inverse:
- movups (%rdx),%xmm0
- movups (%rdi),%xmm1
+ movups (%rdi),%xmm0
+ movups (%rdx),%xmm1
.byte 102,15,56,219,192
.byte 102,15,56,219,201
- leaq 16(%rdx),%rdx
- leaq -16(%rdi),%rdi
- movups %xmm0,16(%rdi)
- movups %xmm1,-16(%rdx)
- cmpq %rdx,%rdi
+ leaq 16(%rdi),%rdi
+ leaq -16(%rdx),%rdx
+ movups %xmm0,16(%rdx)
+ movups %xmm1,-16(%rdi)
+ cmpq %rdi,%rdx
ja L$dec_key_inverse
- movups (%rdx),%xmm0
+ movups (%rdi),%xmm0
.byte 102,15,56,219,192
pxor %xmm1,%xmm1
- movups %xmm0,(%rdi)
+ movups %xmm0,(%rdx)
pxor %xmm0,%xmm0
-L$dec_key_ret:
- addq $8,%rsp
-
ret
-
.globl _aes_hw_set_encrypt_key
.private_extern _aes_hw_set_encrypt_key
.p2align 4
_aes_hw_set_encrypt_key:
-__aesni_set_encrypt_key:
_CET_ENDBR
@@ -2331,7 +2321,6 @@
xorps %xmm1,%xmm2
ret
-
.section __DATA,__const
.p2align 6
L$bswap_mask:
diff --git a/gen/bcm/aesni-x86_64-linux.S b/gen/bcm/aesni-x86_64-linux.S
index fdbb28e..38ed6e7 100644
--- a/gen/bcm/aesni-x86_64-linux.S
+++ b/gen/bcm/aesni-x86_64-linux.S
@@ -1907,61 +1907,51 @@
ret
.cfi_endproc
.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
-.globl aes_hw_set_decrypt_key
-.hidden aes_hw_set_decrypt_key
-.type aes_hw_set_decrypt_key,@function
+.globl aes_hw_encrypt_key_to_decrypt_key
+.hidden aes_hw_encrypt_key_to_decrypt_key
+.type aes_hw_encrypt_key_to_decrypt_key,@function
.align 16
-aes_hw_set_decrypt_key:
+aes_hw_encrypt_key_to_decrypt_key:
.cfi_startproc
-
_CET_ENDBR
- subq $8,%rsp
-.cfi_adjust_cfa_offset 8
-
- call __aesni_set_encrypt_key
+ movl 240(%rdi),%esi
shll $4,%esi
- testl %eax,%eax
- jnz .Ldec_key_ret
- leaq 16(%rdx,%rsi,1),%rdi
- movups (%rdx),%xmm0
- movups (%rdi),%xmm1
- movups %xmm0,(%rdi)
- movups %xmm1,(%rdx)
- leaq 16(%rdx),%rdx
- leaq -16(%rdi),%rdi
+ leaq 16(%rdi,%rsi,1),%rdx
+
+ movups (%rdi),%xmm0
+ movups (%rdx),%xmm1
+ movups %xmm0,(%rdx)
+ movups %xmm1,(%rdi)
+ leaq 16(%rdi),%rdi
+ leaq -16(%rdx),%rdx
.Ldec_key_inverse:
- movups (%rdx),%xmm0
- movups (%rdi),%xmm1
+ movups (%rdi),%xmm0
+ movups (%rdx),%xmm1
.byte 102,15,56,219,192
.byte 102,15,56,219,201
- leaq 16(%rdx),%rdx
- leaq -16(%rdi),%rdi
- movups %xmm0,16(%rdi)
- movups %xmm1,-16(%rdx)
- cmpq %rdx,%rdi
+ leaq 16(%rdi),%rdi
+ leaq -16(%rdx),%rdx
+ movups %xmm0,16(%rdx)
+ movups %xmm1,-16(%rdi)
+ cmpq %rdi,%rdx
ja .Ldec_key_inverse
- movups (%rdx),%xmm0
+ movups (%rdi),%xmm0
.byte 102,15,56,219,192
pxor %xmm1,%xmm1
- movups %xmm0,(%rdi)
+ movups %xmm0,(%rdx)
pxor %xmm0,%xmm0
-.Ldec_key_ret:
- addq $8,%rsp
-.cfi_adjust_cfa_offset -8
ret
.cfi_endproc
-
-.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
+.size aes_hw_encrypt_key_to_decrypt_key,.-aes_hw_encrypt_key_to_decrypt_key
.globl aes_hw_set_encrypt_key
.hidden aes_hw_set_encrypt_key
.type aes_hw_set_encrypt_key,@function
.align 16
aes_hw_set_encrypt_key:
-__aesni_set_encrypt_key:
.cfi_startproc
_CET_ENDBR
@@ -2333,7 +2323,6 @@
xorps %xmm1,%xmm2
ret
.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
-.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
.section .rodata
.align 64
.Lbswap_mask:
diff --git a/gen/bcm/aesni-x86_64-win.asm b/gen/bcm/aesni-x86_64-win.asm
index 6a14422..64dd6be 100644
--- a/gen/bcm/aesni-x86_64-win.asm
+++ b/gen/bcm/aesni-x86_64-win.asm
@@ -2012,59 +2012,49 @@
ret
$L$SEH_end_aes_hw_cbc_encrypt:
-global aes_hw_set_decrypt_key
+global aes_hw_encrypt_key_to_decrypt_key
ALIGN 16
-aes_hw_set_decrypt_key:
+aes_hw_encrypt_key_to_decrypt_key:
-$L$SEH_begin_aes_hw_set_decrypt_key_1:
_CET_ENDBR
- sub rsp,8
-$L$SEH_prologue_aes_hw_set_decrypt_key_2:
-$L$SEH_endprologue_aes_hw_set_decrypt_key_3:
- call __aesni_set_encrypt_key
+ mov edx,DWORD[240+rcx]
shl edx,4
- test eax,eax
- jnz NEAR $L$dec_key_ret
- lea rcx,[16+rdx*1+r8]
- movups xmm0,XMMWORD[r8]
- movups xmm1,XMMWORD[rcx]
- movups XMMWORD[rcx],xmm0
- movups XMMWORD[r8],xmm1
- lea r8,[16+r8]
- lea rcx,[((-16))+rcx]
+ lea r8,[16+rdx*1+rcx]
+
+ movups xmm0,XMMWORD[rcx]
+ movups xmm1,XMMWORD[r8]
+ movups XMMWORD[r8],xmm0
+ movups XMMWORD[rcx],xmm1
+ lea rcx,[16+rcx]
+ lea r8,[((-16))+r8]
$L$dec_key_inverse:
- movups xmm0,XMMWORD[r8]
- movups xmm1,XMMWORD[rcx]
+ movups xmm0,XMMWORD[rcx]
+ movups xmm1,XMMWORD[r8]
DB 102,15,56,219,192
DB 102,15,56,219,201
- lea r8,[16+r8]
- lea rcx,[((-16))+rcx]
- movups XMMWORD[16+rcx],xmm0
- movups XMMWORD[(-16)+r8],xmm1
- cmp rcx,r8
+ lea rcx,[16+rcx]
+ lea r8,[((-16))+r8]
+ movups XMMWORD[16+r8],xmm0
+ movups XMMWORD[(-16)+rcx],xmm1
+ cmp r8,rcx
ja NEAR $L$dec_key_inverse
- movups xmm0,XMMWORD[r8]
+ movups xmm0,XMMWORD[rcx]
DB 102,15,56,219,192
pxor xmm1,xmm1
- movups XMMWORD[rcx],xmm0
+ movups XMMWORD[r8],xmm0
pxor xmm0,xmm0
-$L$dec_key_ret:
- add rsp,8
-
ret
-$L$SEH_end_aes_hw_set_decrypt_key_4:
global aes_hw_set_encrypt_key
ALIGN 16
aes_hw_set_encrypt_key:
-__aesni_set_encrypt_key:
$L$SEH_begin_aes_hw_set_encrypt_key_1:
_CET_ENDBR
@@ -2436,7 +2426,6 @@
xorps xmm2,xmm1
ret
-
section .rdata rdata align=8
ALIGN 64
$L$bswap_mask:
@@ -2661,10 +2650,6 @@
DD cbc_se_handler wrt ..imagebase
section .pdata
ALIGN 4
- DD $L$SEH_begin_aes_hw_set_decrypt_key_1 wrt ..imagebase
- DD $L$SEH_end_aes_hw_set_decrypt_key_4 wrt ..imagebase
- DD $L$SEH_info_aes_hw_set_decrypt_key_0 wrt ..imagebase
-
DD $L$SEH_begin_aes_hw_set_encrypt_key_1 wrt ..imagebase
DD $L$SEH_end_aes_hw_set_encrypt_key_4 wrt ..imagebase
DD $L$SEH_info_aes_hw_set_encrypt_key_0 wrt ..imagebase
@@ -2672,15 +2657,6 @@
section .xdata
ALIGN 4
-$L$SEH_info_aes_hw_set_decrypt_key_0:
- DB 1
- DB $L$SEH_endprologue_aes_hw_set_decrypt_key_3-$L$SEH_begin_aes_hw_set_decrypt_key_1
- DB 1
- DB 0
- DB $L$SEH_prologue_aes_hw_set_decrypt_key_2-$L$SEH_begin_aes_hw_set_decrypt_key_1
- DB 2
-
- DW 0
$L$SEH_info_aes_hw_set_encrypt_key_0:
DB 1
DB $L$SEH_endprologue_aes_hw_set_encrypt_key_3-$L$SEH_begin_aes_hw_set_encrypt_key_1