Use vmovdqa to save/restore xmm registers in AES-GCM code on Windows
The Windows xmm register save/restore code generated by
aes-gcm-avx2-x86_64.pl and aes-gcm-avx10-x86_64.pl used movdqa, which is
a legacy SSE instruction. This was functionally correct, but it was the
only use of legacy SSE instructions in these files. Since these files
contain AVX code, use the VEX encoded forms of these instructions
instead. They are not any longer (in fact they're one byte shorter for
xmm8 and higher), and they have the added bonus of not having their
performance be dependent on whether other code executed vzeroupper.
Change-Id: Ib41ae1097d30d88dfcd4c68c0e850104034a5646
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/77228
Reviewed-by: David Benjamin <davidben@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/aes/asm/aes-gcm-avx10-x86_64.pl b/crypto/fipsmodule/aes/asm/aes-gcm-avx10-x86_64.pl
index 2f35323..6dc3da0 100644
--- a/crypto/fipsmodule/aes/asm/aes-gcm-avx10-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/aes-gcm-avx10-x86_64.pl
@@ -159,7 +159,7 @@
for my $i ( 0 .. $num_xmmregs - 1 ) {
my $reg_num = $xmmregs[$i];
my $pos = 16 * $i;
- $code .= "movdqa %xmm$reg_num, $pos(%rsp)\n";
+ $code .= "vmovdqa %xmm$reg_num, $pos(%rsp)\n";
$code .= ".seh_savexmm %xmm$reg_num, $pos\n";
}
}
@@ -177,7 +177,7 @@
for my $i ( 0 .. $num_xmmregs - 1 ) {
my $reg_num = $g_cur_func_saved_xmmregs[$i];
my $pos = 16 * $i;
- $code .= "movdqa $pos(%rsp), %xmm$reg_num\n";
+ $code .= "vmovdqa $pos(%rsp), %xmm$reg_num\n";
}
$code .= "add \$$alloc_size, %rsp\n";
}
diff --git a/crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl b/crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl
index deec309..3a1832c 100644
--- a/crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl
@@ -121,7 +121,7 @@
for my $i ( 0 .. $num_xmmregs - 1 ) {
my $reg_num = $xmmregs[$i];
my $pos = 16 * $i;
- $code .= "movdqa %xmm$reg_num, $pos(%rsp)\n";
+ $code .= "vmovdqa %xmm$reg_num, $pos(%rsp)\n";
$code .= ".seh_savexmm %xmm$reg_num, $pos\n";
}
}
@@ -139,7 +139,7 @@
for my $i ( 0 .. $num_xmmregs - 1 ) {
my $reg_num = $g_cur_func_saved_xmmregs[$i];
my $pos = 16 * $i;
- $code .= "movdqa $pos(%rsp), %xmm$reg_num\n";
+ $code .= "vmovdqa $pos(%rsp), %xmm$reg_num\n";
}
$code .= "add \$$alloc_size, %rsp\n";
}
diff --git a/gen/bcm/aes-gcm-avx10-x86_64-win.asm b/gen/bcm/aes-gcm-avx10-x86_64-win.asm
index f2073e7..8beb10e 100644
--- a/gen/bcm/aes-gcm-avx10-x86_64-win.asm
+++ b/gen/bcm/aes-gcm-avx10-x86_64-win.asm
@@ -56,7 +56,7 @@
_CET_ENDBR
sub rsp,24
$L$SEH_prologue_gcm_gmult_vpclmulqdq_avx10_2:
- movdqa XMMWORD[rsp],xmm6
+ vmovdqa XMMWORD[rsp],xmm6
$L$SEH_prologue_gcm_gmult_vpclmulqdq_avx10_3:
$L$SEH_endprologue_gcm_gmult_vpclmulqdq_avx10_4:
@@ -82,7 +82,7 @@
vpshufb xmm0,xmm0,xmm1
vmovdqu XMMWORD[rcx],xmm0
- movdqa xmm6,XMMWORD[rsp]
+ vmovdqa xmm6,XMMWORD[rsp]
add rsp,24
ret
$L$SEH_end_gcm_gmult_vpclmulqdq_avx10_5:
@@ -203,21 +203,21 @@
_CET_ENDBR
sub rsp,136
$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_2:
- movdqa XMMWORD[rsp],xmm6
+ vmovdqa XMMWORD[rsp],xmm6
$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_3:
- movdqa XMMWORD[16+rsp],xmm7
+ vmovdqa XMMWORD[16+rsp],xmm7
$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_4:
- movdqa XMMWORD[32+rsp],xmm8
+ vmovdqa XMMWORD[32+rsp],xmm8
$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_5:
- movdqa XMMWORD[48+rsp],xmm9
+ vmovdqa XMMWORD[48+rsp],xmm9
$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_6:
- movdqa XMMWORD[64+rsp],xmm10
+ vmovdqa XMMWORD[64+rsp],xmm10
$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_7:
- movdqa XMMWORD[80+rsp],xmm11
+ vmovdqa XMMWORD[80+rsp],xmm11
$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_8:
- movdqa XMMWORD[96+rsp],xmm12
+ vmovdqa XMMWORD[96+rsp],xmm12
$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_9:
- movdqa XMMWORD[112+rsp],xmm13
+ vmovdqa XMMWORD[112+rsp],xmm13
$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_10:
$L$SEH_endprologue_gcm_ghash_vpclmulqdq_avx10_512_11:
@@ -367,14 +367,14 @@
vmovdqu XMMWORD[rcx],xmm5
vzeroupper
- movdqa xmm6,XMMWORD[rsp]
- movdqa xmm7,XMMWORD[16+rsp]
- movdqa xmm8,XMMWORD[32+rsp]
- movdqa xmm9,XMMWORD[48+rsp]
- movdqa xmm10,XMMWORD[64+rsp]
- movdqa xmm11,XMMWORD[80+rsp]
- movdqa xmm12,XMMWORD[96+rsp]
- movdqa xmm13,XMMWORD[112+rsp]
+ vmovdqa xmm6,XMMWORD[rsp]
+ vmovdqa xmm7,XMMWORD[16+rsp]
+ vmovdqa xmm8,XMMWORD[32+rsp]
+ vmovdqa xmm9,XMMWORD[48+rsp]
+ vmovdqa xmm10,XMMWORD[64+rsp]
+ vmovdqa xmm11,XMMWORD[80+rsp]
+ vmovdqa xmm12,XMMWORD[96+rsp]
+ vmovdqa xmm13,XMMWORD[112+rsp]
add rsp,136
ret
$L$SEH_end_gcm_ghash_vpclmulqdq_avx10_512_12:
@@ -399,25 +399,25 @@
mov r12,QWORD[80+rsp]
sub rsp,160
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_5:
- movdqa XMMWORD[rsp],xmm6
+ vmovdqa XMMWORD[rsp],xmm6
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_6:
- movdqa XMMWORD[16+rsp],xmm7
+ vmovdqa XMMWORD[16+rsp],xmm7
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_7:
- movdqa XMMWORD[32+rsp],xmm8
+ vmovdqa XMMWORD[32+rsp],xmm8
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_8:
- movdqa XMMWORD[48+rsp],xmm9
+ vmovdqa XMMWORD[48+rsp],xmm9
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_9:
- movdqa XMMWORD[64+rsp],xmm10
+ vmovdqa XMMWORD[64+rsp],xmm10
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_10:
- movdqa XMMWORD[80+rsp],xmm11
+ vmovdqa XMMWORD[80+rsp],xmm11
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_11:
- movdqa XMMWORD[96+rsp],xmm12
+ vmovdqa XMMWORD[96+rsp],xmm12
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_12:
- movdqa XMMWORD[112+rsp],xmm13
+ vmovdqa XMMWORD[112+rsp],xmm13
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_13:
- movdqa XMMWORD[128+rsp],xmm14
+ vmovdqa XMMWORD[128+rsp],xmm14
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_14:
- movdqa XMMWORD[144+rsp],xmm15
+ vmovdqa XMMWORD[144+rsp],xmm15
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_15:
$L$SEH_endprologue_aes_gcm_enc_update_vaes_avx10_512_16:
@@ -894,16 +894,16 @@
vmovdqu XMMWORD[r12],xmm10
vzeroupper
- movdqa xmm6,XMMWORD[rsp]
- movdqa xmm7,XMMWORD[16+rsp]
- movdqa xmm8,XMMWORD[32+rsp]
- movdqa xmm9,XMMWORD[48+rsp]
- movdqa xmm10,XMMWORD[64+rsp]
- movdqa xmm11,XMMWORD[80+rsp]
- movdqa xmm12,XMMWORD[96+rsp]
- movdqa xmm13,XMMWORD[112+rsp]
- movdqa xmm14,XMMWORD[128+rsp]
- movdqa xmm15,XMMWORD[144+rsp]
+ vmovdqa xmm6,XMMWORD[rsp]
+ vmovdqa xmm7,XMMWORD[16+rsp]
+ vmovdqa xmm8,XMMWORD[32+rsp]
+ vmovdqa xmm9,XMMWORD[48+rsp]
+ vmovdqa xmm10,XMMWORD[64+rsp]
+ vmovdqa xmm11,XMMWORD[80+rsp]
+ vmovdqa xmm12,XMMWORD[96+rsp]
+ vmovdqa xmm13,XMMWORD[112+rsp]
+ vmovdqa xmm14,XMMWORD[128+rsp]
+ vmovdqa xmm15,XMMWORD[144+rsp]
add rsp,160
pop r12
pop rdi
@@ -931,25 +931,25 @@
mov r12,QWORD[80+rsp]
sub rsp,160
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_5:
- movdqa XMMWORD[rsp],xmm6
+ vmovdqa XMMWORD[rsp],xmm6
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_6:
- movdqa XMMWORD[16+rsp],xmm7
+ vmovdqa XMMWORD[16+rsp],xmm7
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_7:
- movdqa XMMWORD[32+rsp],xmm8
+ vmovdqa XMMWORD[32+rsp],xmm8
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_8:
- movdqa XMMWORD[48+rsp],xmm9
+ vmovdqa XMMWORD[48+rsp],xmm9
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_9:
- movdqa XMMWORD[64+rsp],xmm10
+ vmovdqa XMMWORD[64+rsp],xmm10
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_10:
- movdqa XMMWORD[80+rsp],xmm11
+ vmovdqa XMMWORD[80+rsp],xmm11
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_11:
- movdqa XMMWORD[96+rsp],xmm12
+ vmovdqa XMMWORD[96+rsp],xmm12
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_12:
- movdqa XMMWORD[112+rsp],xmm13
+ vmovdqa XMMWORD[112+rsp],xmm13
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_13:
- movdqa XMMWORD[128+rsp],xmm14
+ vmovdqa XMMWORD[128+rsp],xmm14
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_14:
- movdqa XMMWORD[144+rsp],xmm15
+ vmovdqa XMMWORD[144+rsp],xmm15
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_15:
$L$SEH_endprologue_aes_gcm_dec_update_vaes_avx10_512_16:
@@ -1336,16 +1336,16 @@
vmovdqu XMMWORD[r12],xmm10
vzeroupper
- movdqa xmm6,XMMWORD[rsp]
- movdqa xmm7,XMMWORD[16+rsp]
- movdqa xmm8,XMMWORD[32+rsp]
- movdqa xmm9,XMMWORD[48+rsp]
- movdqa xmm10,XMMWORD[64+rsp]
- movdqa xmm11,XMMWORD[80+rsp]
- movdqa xmm12,XMMWORD[96+rsp]
- movdqa xmm13,XMMWORD[112+rsp]
- movdqa xmm14,XMMWORD[128+rsp]
- movdqa xmm15,XMMWORD[144+rsp]
+ vmovdqa xmm6,XMMWORD[rsp]
+ vmovdqa xmm7,XMMWORD[16+rsp]
+ vmovdqa xmm8,XMMWORD[32+rsp]
+ vmovdqa xmm9,XMMWORD[48+rsp]
+ vmovdqa xmm10,XMMWORD[64+rsp]
+ vmovdqa xmm11,XMMWORD[80+rsp]
+ vmovdqa xmm12,XMMWORD[96+rsp]
+ vmovdqa xmm13,XMMWORD[112+rsp]
+ vmovdqa xmm14,XMMWORD[128+rsp]
+ vmovdqa xmm15,XMMWORD[144+rsp]
add rsp,160
pop r12
pop rdi
diff --git a/gen/bcm/aes-gcm-avx2-x86_64-win.asm b/gen/bcm/aes-gcm-avx2-x86_64-win.asm
index aec14b3..638348e 100644
--- a/gen/bcm/aes-gcm-avx2-x86_64-win.asm
+++ b/gen/bcm/aes-gcm-avx2-x86_64-win.asm
@@ -52,7 +52,7 @@
_CET_ENDBR
sub rsp,24
$L$SEH_prologue_gcm_init_vpclmulqdq_avx2_2:
- movdqa XMMWORD[rsp],xmm6
+ vmovdqa XMMWORD[rsp],xmm6
$L$SEH_prologue_gcm_init_vpclmulqdq_avx2_3:
$L$SEH_endprologue_gcm_init_vpclmulqdq_avx2_4:
@@ -160,7 +160,7 @@
vmovdqu YMMWORD[128+rcx],ymm0
vzeroupper
- movdqa xmm6,XMMWORD[rsp]
+ vmovdqa xmm6,XMMWORD[rsp]
add rsp,24
ret
$L$SEH_end_gcm_init_vpclmulqdq_avx2_5:
@@ -175,7 +175,7 @@
_CET_ENDBR
sub rsp,24
$L$SEH_prologue_gcm_gmult_vpclmulqdq_avx2_2:
- movdqa XMMWORD[rsp],xmm6
+ vmovdqa XMMWORD[rsp],xmm6
$L$SEH_prologue_gcm_gmult_vpclmulqdq_avx2_3:
$L$SEH_endprologue_gcm_gmult_vpclmulqdq_avx2_4:
@@ -203,7 +203,7 @@
vpshufb xmm0,xmm0,xmm1
vmovdqu XMMWORD[rcx],xmm0
- movdqa xmm6,XMMWORD[rsp]
+ vmovdqa xmm6,XMMWORD[rsp]
add rsp,24
ret
$L$SEH_end_gcm_gmult_vpclmulqdq_avx2_5:
@@ -218,13 +218,13 @@
_CET_ENDBR
sub rsp,72
$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx2_2:
- movdqa XMMWORD[rsp],xmm6
+ vmovdqa XMMWORD[rsp],xmm6
$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx2_3:
- movdqa XMMWORD[16+rsp],xmm7
+ vmovdqa XMMWORD[16+rsp],xmm7
$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx2_4:
- movdqa XMMWORD[32+rsp],xmm8
+ vmovdqa XMMWORD[32+rsp],xmm8
$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx2_5:
- movdqa XMMWORD[48+rsp],xmm9
+ vmovdqa XMMWORD[48+rsp],xmm9
$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx2_6:
$L$SEH_endprologue_gcm_ghash_vpclmulqdq_avx2_7:
@@ -384,10 +384,10 @@
vmovdqu XMMWORD[rcx],xmm5
vzeroupper
- movdqa xmm6,XMMWORD[rsp]
- movdqa xmm7,XMMWORD[16+rsp]
- movdqa xmm8,XMMWORD[32+rsp]
- movdqa xmm9,XMMWORD[48+rsp]
+ vmovdqa xmm6,XMMWORD[rsp]
+ vmovdqa xmm7,XMMWORD[16+rsp]
+ vmovdqa xmm8,XMMWORD[32+rsp]
+ vmovdqa xmm9,XMMWORD[48+rsp]
add rsp,72
ret
$L$SEH_end_gcm_ghash_vpclmulqdq_avx2_8:
@@ -412,25 +412,25 @@
mov r12,QWORD[80+rsp]
sub rsp,160
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_5:
- movdqa XMMWORD[rsp],xmm6
+ vmovdqa XMMWORD[rsp],xmm6
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_6:
- movdqa XMMWORD[16+rsp],xmm7
+ vmovdqa XMMWORD[16+rsp],xmm7
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_7:
- movdqa XMMWORD[32+rsp],xmm8
+ vmovdqa XMMWORD[32+rsp],xmm8
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_8:
- movdqa XMMWORD[48+rsp],xmm9
+ vmovdqa XMMWORD[48+rsp],xmm9
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_9:
- movdqa XMMWORD[64+rsp],xmm10
+ vmovdqa XMMWORD[64+rsp],xmm10
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_10:
- movdqa XMMWORD[80+rsp],xmm11
+ vmovdqa XMMWORD[80+rsp],xmm11
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_11:
- movdqa XMMWORD[96+rsp],xmm12
+ vmovdqa XMMWORD[96+rsp],xmm12
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_12:
- movdqa XMMWORD[112+rsp],xmm13
+ vmovdqa XMMWORD[112+rsp],xmm13
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_13:
- movdqa XMMWORD[128+rsp],xmm14
+ vmovdqa XMMWORD[128+rsp],xmm14
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_14:
- movdqa XMMWORD[144+rsp],xmm15
+ vmovdqa XMMWORD[144+rsp],xmm15
$L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_15:
$L$SEH_endprologue_aes_gcm_enc_update_vaes_avx2_16:
@@ -954,16 +954,16 @@
vmovdqu XMMWORD[r12],xmm1
vzeroupper
- movdqa xmm6,XMMWORD[rsp]
- movdqa xmm7,XMMWORD[16+rsp]
- movdqa xmm8,XMMWORD[32+rsp]
- movdqa xmm9,XMMWORD[48+rsp]
- movdqa xmm10,XMMWORD[64+rsp]
- movdqa xmm11,XMMWORD[80+rsp]
- movdqa xmm12,XMMWORD[96+rsp]
- movdqa xmm13,XMMWORD[112+rsp]
- movdqa xmm14,XMMWORD[128+rsp]
- movdqa xmm15,XMMWORD[144+rsp]
+ vmovdqa xmm6,XMMWORD[rsp]
+ vmovdqa xmm7,XMMWORD[16+rsp]
+ vmovdqa xmm8,XMMWORD[32+rsp]
+ vmovdqa xmm9,XMMWORD[48+rsp]
+ vmovdqa xmm10,XMMWORD[64+rsp]
+ vmovdqa xmm11,XMMWORD[80+rsp]
+ vmovdqa xmm12,XMMWORD[96+rsp]
+ vmovdqa xmm13,XMMWORD[112+rsp]
+ vmovdqa xmm14,XMMWORD[128+rsp]
+ vmovdqa xmm15,XMMWORD[144+rsp]
add rsp,160
pop r12
pop rdi
@@ -991,25 +991,25 @@
mov r12,QWORD[80+rsp]
sub rsp,160
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_5:
- movdqa XMMWORD[rsp],xmm6
+ vmovdqa XMMWORD[rsp],xmm6
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_6:
- movdqa XMMWORD[16+rsp],xmm7
+ vmovdqa XMMWORD[16+rsp],xmm7
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_7:
- movdqa XMMWORD[32+rsp],xmm8
+ vmovdqa XMMWORD[32+rsp],xmm8
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_8:
- movdqa XMMWORD[48+rsp],xmm9
+ vmovdqa XMMWORD[48+rsp],xmm9
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_9:
- movdqa XMMWORD[64+rsp],xmm10
+ vmovdqa XMMWORD[64+rsp],xmm10
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_10:
- movdqa XMMWORD[80+rsp],xmm11
+ vmovdqa XMMWORD[80+rsp],xmm11
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_11:
- movdqa XMMWORD[96+rsp],xmm12
+ vmovdqa XMMWORD[96+rsp],xmm12
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_12:
- movdqa XMMWORD[112+rsp],xmm13
+ vmovdqa XMMWORD[112+rsp],xmm13
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_13:
- movdqa XMMWORD[128+rsp],xmm14
+ vmovdqa XMMWORD[128+rsp],xmm14
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_14:
- movdqa XMMWORD[144+rsp],xmm15
+ vmovdqa XMMWORD[144+rsp],xmm15
$L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_15:
$L$SEH_endprologue_aes_gcm_dec_update_vaes_avx2_16:
@@ -1415,16 +1415,16 @@
vmovdqu XMMWORD[r12],xmm1
vzeroupper
- movdqa xmm6,XMMWORD[rsp]
- movdqa xmm7,XMMWORD[16+rsp]
- movdqa xmm8,XMMWORD[32+rsp]
- movdqa xmm9,XMMWORD[48+rsp]
- movdqa xmm10,XMMWORD[64+rsp]
- movdqa xmm11,XMMWORD[80+rsp]
- movdqa xmm12,XMMWORD[96+rsp]
- movdqa xmm13,XMMWORD[112+rsp]
- movdqa xmm14,XMMWORD[128+rsp]
- movdqa xmm15,XMMWORD[144+rsp]
+ vmovdqa xmm6,XMMWORD[rsp]
+ vmovdqa xmm7,XMMWORD[16+rsp]
+ vmovdqa xmm8,XMMWORD[32+rsp]
+ vmovdqa xmm9,XMMWORD[48+rsp]
+ vmovdqa xmm10,XMMWORD[64+rsp]
+ vmovdqa xmm11,XMMWORD[80+rsp]
+ vmovdqa xmm12,XMMWORD[96+rsp]
+ vmovdqa xmm13,XMMWORD[112+rsp]
+ vmovdqa xmm14,XMMWORD[128+rsp]
+ vmovdqa xmm15,XMMWORD[144+rsp]
add rsp,160
pop r12
pop rdi