Use vmovdqa to save/restore xmm registers in AES-GCM code on Windows The Windows xmm register save/restore code generated by aes-gcm-avx2-x86_64.pl and aes-gcm-avx10-x86_64.pl used movdqa, which is a legacy SSE instruction. This was functionally correct, but it was the only use of legacy SSE instructions in these files. Since these files contain AVX code, use the VEX encoded forms of these instructions instead. They are not any longer (in fact they're one byte shorter for xmm8 and higher), and they have the added bonus of not having their performance be dependent on whether other code executed vzeroupper. Change-Id: Ib41ae1097d30d88dfcd4c68c0e850104034a5646 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/77228 Reviewed-by: David Benjamin <davidben@google.com> Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/aes/asm/aes-gcm-avx10-x86_64.pl b/crypto/fipsmodule/aes/asm/aes-gcm-avx10-x86_64.pl index 2f35323..6dc3da0 100644 --- a/crypto/fipsmodule/aes/asm/aes-gcm-avx10-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/aes-gcm-avx10-x86_64.pl
@@ -159,7 +159,7 @@ for my $i ( 0 .. $num_xmmregs - 1 ) { my $reg_num = $xmmregs[$i]; my $pos = 16 * $i; - $code .= "movdqa %xmm$reg_num, $pos(%rsp)\n"; + $code .= "vmovdqa %xmm$reg_num, $pos(%rsp)\n"; $code .= ".seh_savexmm %xmm$reg_num, $pos\n"; } } @@ -177,7 +177,7 @@ for my $i ( 0 .. $num_xmmregs - 1 ) { my $reg_num = $g_cur_func_saved_xmmregs[$i]; my $pos = 16 * $i; - $code .= "movdqa $pos(%rsp), %xmm$reg_num\n"; + $code .= "vmovdqa $pos(%rsp), %xmm$reg_num\n"; } $code .= "add \$$alloc_size, %rsp\n"; }
diff --git a/crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl b/crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl index deec309..3a1832c 100644 --- a/crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl
@@ -121,7 +121,7 @@ for my $i ( 0 .. $num_xmmregs - 1 ) { my $reg_num = $xmmregs[$i]; my $pos = 16 * $i; - $code .= "movdqa %xmm$reg_num, $pos(%rsp)\n"; + $code .= "vmovdqa %xmm$reg_num, $pos(%rsp)\n"; $code .= ".seh_savexmm %xmm$reg_num, $pos\n"; } } @@ -139,7 +139,7 @@ for my $i ( 0 .. $num_xmmregs - 1 ) { my $reg_num = $g_cur_func_saved_xmmregs[$i]; my $pos = 16 * $i; - $code .= "movdqa $pos(%rsp), %xmm$reg_num\n"; + $code .= "vmovdqa $pos(%rsp), %xmm$reg_num\n"; } $code .= "add \$$alloc_size, %rsp\n"; }
diff --git a/gen/bcm/aes-gcm-avx10-x86_64-win.asm b/gen/bcm/aes-gcm-avx10-x86_64-win.asm index f2073e7..8beb10e 100644 --- a/gen/bcm/aes-gcm-avx10-x86_64-win.asm +++ b/gen/bcm/aes-gcm-avx10-x86_64-win.asm
@@ -56,7 +56,7 @@ _CET_ENDBR sub rsp,24 $L$SEH_prologue_gcm_gmult_vpclmulqdq_avx10_2: - movdqa XMMWORD[rsp],xmm6 + vmovdqa XMMWORD[rsp],xmm6 $L$SEH_prologue_gcm_gmult_vpclmulqdq_avx10_3: $L$SEH_endprologue_gcm_gmult_vpclmulqdq_avx10_4: @@ -82,7 +82,7 @@ vpshufb xmm0,xmm0,xmm1 vmovdqu XMMWORD[rcx],xmm0 - movdqa xmm6,XMMWORD[rsp] + vmovdqa xmm6,XMMWORD[rsp] add rsp,24 ret $L$SEH_end_gcm_gmult_vpclmulqdq_avx10_5: @@ -203,21 +203,21 @@ _CET_ENDBR sub rsp,136 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_2: - movdqa XMMWORD[rsp],xmm6 + vmovdqa XMMWORD[rsp],xmm6 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_3: - movdqa XMMWORD[16+rsp],xmm7 + vmovdqa XMMWORD[16+rsp],xmm7 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_4: - movdqa XMMWORD[32+rsp],xmm8 + vmovdqa XMMWORD[32+rsp],xmm8 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_5: - movdqa XMMWORD[48+rsp],xmm9 + vmovdqa XMMWORD[48+rsp],xmm9 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_6: - movdqa XMMWORD[64+rsp],xmm10 + vmovdqa XMMWORD[64+rsp],xmm10 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_7: - movdqa XMMWORD[80+rsp],xmm11 + vmovdqa XMMWORD[80+rsp],xmm11 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_8: - movdqa XMMWORD[96+rsp],xmm12 + vmovdqa XMMWORD[96+rsp],xmm12 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_9: - movdqa XMMWORD[112+rsp],xmm13 + vmovdqa XMMWORD[112+rsp],xmm13 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_10: $L$SEH_endprologue_gcm_ghash_vpclmulqdq_avx10_512_11: @@ -367,14 +367,14 @@ vmovdqu XMMWORD[rcx],xmm5 vzeroupper - movdqa xmm6,XMMWORD[rsp] - movdqa xmm7,XMMWORD[16+rsp] - movdqa xmm8,XMMWORD[32+rsp] - movdqa xmm9,XMMWORD[48+rsp] - movdqa xmm10,XMMWORD[64+rsp] - movdqa xmm11,XMMWORD[80+rsp] - movdqa xmm12,XMMWORD[96+rsp] - movdqa xmm13,XMMWORD[112+rsp] + vmovdqa xmm6,XMMWORD[rsp] + vmovdqa xmm7,XMMWORD[16+rsp] + vmovdqa xmm8,XMMWORD[32+rsp] + vmovdqa xmm9,XMMWORD[48+rsp] + vmovdqa xmm10,XMMWORD[64+rsp] + vmovdqa xmm11,XMMWORD[80+rsp] + vmovdqa xmm12,XMMWORD[96+rsp] + vmovdqa xmm13,XMMWORD[112+rsp] add rsp,136 ret $L$SEH_end_gcm_ghash_vpclmulqdq_avx10_512_12: @@ -399,25 +399,25 @@ mov r12,QWORD[80+rsp] sub rsp,160 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_5: - movdqa XMMWORD[rsp],xmm6 + vmovdqa XMMWORD[rsp],xmm6 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_6: - movdqa XMMWORD[16+rsp],xmm7 + vmovdqa XMMWORD[16+rsp],xmm7 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_7: - movdqa XMMWORD[32+rsp],xmm8 + vmovdqa XMMWORD[32+rsp],xmm8 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_8: - movdqa XMMWORD[48+rsp],xmm9 + vmovdqa XMMWORD[48+rsp],xmm9 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_9: - movdqa XMMWORD[64+rsp],xmm10 + vmovdqa XMMWORD[64+rsp],xmm10 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_10: - movdqa XMMWORD[80+rsp],xmm11 + vmovdqa XMMWORD[80+rsp],xmm11 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_11: - movdqa XMMWORD[96+rsp],xmm12 + vmovdqa XMMWORD[96+rsp],xmm12 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_12: - movdqa XMMWORD[112+rsp],xmm13 + vmovdqa XMMWORD[112+rsp],xmm13 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_13: - movdqa XMMWORD[128+rsp],xmm14 + vmovdqa XMMWORD[128+rsp],xmm14 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_14: - movdqa XMMWORD[144+rsp],xmm15 + vmovdqa XMMWORD[144+rsp],xmm15 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_15: $L$SEH_endprologue_aes_gcm_enc_update_vaes_avx10_512_16: @@ -894,16 +894,16 @@ vmovdqu XMMWORD[r12],xmm10 vzeroupper - movdqa xmm6,XMMWORD[rsp] - movdqa xmm7,XMMWORD[16+rsp] - movdqa xmm8,XMMWORD[32+rsp] - movdqa xmm9,XMMWORD[48+rsp] - movdqa xmm10,XMMWORD[64+rsp] - movdqa xmm11,XMMWORD[80+rsp] - movdqa xmm12,XMMWORD[96+rsp] - movdqa xmm13,XMMWORD[112+rsp] - movdqa xmm14,XMMWORD[128+rsp] - movdqa xmm15,XMMWORD[144+rsp] + vmovdqa xmm6,XMMWORD[rsp] + vmovdqa xmm7,XMMWORD[16+rsp] + vmovdqa xmm8,XMMWORD[32+rsp] + vmovdqa xmm9,XMMWORD[48+rsp] + vmovdqa xmm10,XMMWORD[64+rsp] + vmovdqa xmm11,XMMWORD[80+rsp] + vmovdqa xmm12,XMMWORD[96+rsp] + vmovdqa xmm13,XMMWORD[112+rsp] + vmovdqa xmm14,XMMWORD[128+rsp] + vmovdqa xmm15,XMMWORD[144+rsp] add rsp,160 pop r12 pop rdi @@ -931,25 +931,25 @@ mov r12,QWORD[80+rsp] sub rsp,160 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_5: - movdqa XMMWORD[rsp],xmm6 + vmovdqa XMMWORD[rsp],xmm6 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_6: - movdqa XMMWORD[16+rsp],xmm7 + vmovdqa XMMWORD[16+rsp],xmm7 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_7: - movdqa XMMWORD[32+rsp],xmm8 + vmovdqa XMMWORD[32+rsp],xmm8 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_8: - movdqa XMMWORD[48+rsp],xmm9 + vmovdqa XMMWORD[48+rsp],xmm9 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_9: - movdqa XMMWORD[64+rsp],xmm10 + vmovdqa XMMWORD[64+rsp],xmm10 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_10: - movdqa XMMWORD[80+rsp],xmm11 + vmovdqa XMMWORD[80+rsp],xmm11 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_11: - movdqa XMMWORD[96+rsp],xmm12 + vmovdqa XMMWORD[96+rsp],xmm12 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_12: - movdqa XMMWORD[112+rsp],xmm13 + vmovdqa XMMWORD[112+rsp],xmm13 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_13: - movdqa XMMWORD[128+rsp],xmm14 + vmovdqa XMMWORD[128+rsp],xmm14 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_14: - movdqa XMMWORD[144+rsp],xmm15 + vmovdqa XMMWORD[144+rsp],xmm15 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_15: $L$SEH_endprologue_aes_gcm_dec_update_vaes_avx10_512_16: @@ -1336,16 +1336,16 @@ vmovdqu XMMWORD[r12],xmm10 vzeroupper - movdqa xmm6,XMMWORD[rsp] - movdqa xmm7,XMMWORD[16+rsp] - movdqa xmm8,XMMWORD[32+rsp] - movdqa xmm9,XMMWORD[48+rsp] - movdqa xmm10,XMMWORD[64+rsp] - movdqa xmm11,XMMWORD[80+rsp] - movdqa xmm12,XMMWORD[96+rsp] - movdqa xmm13,XMMWORD[112+rsp] - movdqa xmm14,XMMWORD[128+rsp] - movdqa xmm15,XMMWORD[144+rsp] + vmovdqa xmm6,XMMWORD[rsp] + vmovdqa xmm7,XMMWORD[16+rsp] + vmovdqa xmm8,XMMWORD[32+rsp] + vmovdqa xmm9,XMMWORD[48+rsp] + vmovdqa xmm10,XMMWORD[64+rsp] + vmovdqa xmm11,XMMWORD[80+rsp] + vmovdqa xmm12,XMMWORD[96+rsp] + vmovdqa xmm13,XMMWORD[112+rsp] + vmovdqa xmm14,XMMWORD[128+rsp] + vmovdqa xmm15,XMMWORD[144+rsp] add rsp,160 pop r12 pop rdi
diff --git a/gen/bcm/aes-gcm-avx2-x86_64-win.asm b/gen/bcm/aes-gcm-avx2-x86_64-win.asm index aec14b3..638348e 100644 --- a/gen/bcm/aes-gcm-avx2-x86_64-win.asm +++ b/gen/bcm/aes-gcm-avx2-x86_64-win.asm
@@ -52,7 +52,7 @@ _CET_ENDBR sub rsp,24 $L$SEH_prologue_gcm_init_vpclmulqdq_avx2_2: - movdqa XMMWORD[rsp],xmm6 + vmovdqa XMMWORD[rsp],xmm6 $L$SEH_prologue_gcm_init_vpclmulqdq_avx2_3: $L$SEH_endprologue_gcm_init_vpclmulqdq_avx2_4: @@ -160,7 +160,7 @@ vmovdqu YMMWORD[128+rcx],ymm0 vzeroupper - movdqa xmm6,XMMWORD[rsp] + vmovdqa xmm6,XMMWORD[rsp] add rsp,24 ret $L$SEH_end_gcm_init_vpclmulqdq_avx2_5: @@ -175,7 +175,7 @@ _CET_ENDBR sub rsp,24 $L$SEH_prologue_gcm_gmult_vpclmulqdq_avx2_2: - movdqa XMMWORD[rsp],xmm6 + vmovdqa XMMWORD[rsp],xmm6 $L$SEH_prologue_gcm_gmult_vpclmulqdq_avx2_3: $L$SEH_endprologue_gcm_gmult_vpclmulqdq_avx2_4: @@ -203,7 +203,7 @@ vpshufb xmm0,xmm0,xmm1 vmovdqu XMMWORD[rcx],xmm0 - movdqa xmm6,XMMWORD[rsp] + vmovdqa xmm6,XMMWORD[rsp] add rsp,24 ret $L$SEH_end_gcm_gmult_vpclmulqdq_avx2_5: @@ -218,13 +218,13 @@ _CET_ENDBR sub rsp,72 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx2_2: - movdqa XMMWORD[rsp],xmm6 + vmovdqa XMMWORD[rsp],xmm6 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx2_3: - movdqa XMMWORD[16+rsp],xmm7 + vmovdqa XMMWORD[16+rsp],xmm7 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx2_4: - movdqa XMMWORD[32+rsp],xmm8 + vmovdqa XMMWORD[32+rsp],xmm8 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx2_5: - movdqa XMMWORD[48+rsp],xmm9 + vmovdqa XMMWORD[48+rsp],xmm9 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx2_6: $L$SEH_endprologue_gcm_ghash_vpclmulqdq_avx2_7: @@ -384,10 +384,10 @@ vmovdqu XMMWORD[rcx],xmm5 vzeroupper - movdqa xmm6,XMMWORD[rsp] - movdqa xmm7,XMMWORD[16+rsp] - movdqa xmm8,XMMWORD[32+rsp] - movdqa xmm9,XMMWORD[48+rsp] + vmovdqa xmm6,XMMWORD[rsp] + vmovdqa xmm7,XMMWORD[16+rsp] + vmovdqa xmm8,XMMWORD[32+rsp] + vmovdqa xmm9,XMMWORD[48+rsp] add rsp,72 ret $L$SEH_end_gcm_ghash_vpclmulqdq_avx2_8: @@ -412,25 +412,25 @@ mov r12,QWORD[80+rsp] sub rsp,160 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_5: - movdqa XMMWORD[rsp],xmm6 + vmovdqa XMMWORD[rsp],xmm6 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_6: - movdqa XMMWORD[16+rsp],xmm7 + vmovdqa XMMWORD[16+rsp],xmm7 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_7: - movdqa XMMWORD[32+rsp],xmm8 + vmovdqa XMMWORD[32+rsp],xmm8 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_8: - movdqa XMMWORD[48+rsp],xmm9 + vmovdqa XMMWORD[48+rsp],xmm9 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_9: - movdqa XMMWORD[64+rsp],xmm10 + vmovdqa XMMWORD[64+rsp],xmm10 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_10: - movdqa XMMWORD[80+rsp],xmm11 + vmovdqa XMMWORD[80+rsp],xmm11 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_11: - movdqa XMMWORD[96+rsp],xmm12 + vmovdqa XMMWORD[96+rsp],xmm12 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_12: - movdqa XMMWORD[112+rsp],xmm13 + vmovdqa XMMWORD[112+rsp],xmm13 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_13: - movdqa XMMWORD[128+rsp],xmm14 + vmovdqa XMMWORD[128+rsp],xmm14 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_14: - movdqa XMMWORD[144+rsp],xmm15 + vmovdqa XMMWORD[144+rsp],xmm15 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_15: $L$SEH_endprologue_aes_gcm_enc_update_vaes_avx2_16: @@ -954,16 +954,16 @@ vmovdqu XMMWORD[r12],xmm1 vzeroupper - movdqa xmm6,XMMWORD[rsp] - movdqa xmm7,XMMWORD[16+rsp] - movdqa xmm8,XMMWORD[32+rsp] - movdqa xmm9,XMMWORD[48+rsp] - movdqa xmm10,XMMWORD[64+rsp] - movdqa xmm11,XMMWORD[80+rsp] - movdqa xmm12,XMMWORD[96+rsp] - movdqa xmm13,XMMWORD[112+rsp] - movdqa xmm14,XMMWORD[128+rsp] - movdqa xmm15,XMMWORD[144+rsp] + vmovdqa xmm6,XMMWORD[rsp] + vmovdqa xmm7,XMMWORD[16+rsp] + vmovdqa xmm8,XMMWORD[32+rsp] + vmovdqa xmm9,XMMWORD[48+rsp] + vmovdqa xmm10,XMMWORD[64+rsp] + vmovdqa xmm11,XMMWORD[80+rsp] + vmovdqa xmm12,XMMWORD[96+rsp] + vmovdqa xmm13,XMMWORD[112+rsp] + vmovdqa xmm14,XMMWORD[128+rsp] + vmovdqa xmm15,XMMWORD[144+rsp] add rsp,160 pop r12 pop rdi @@ -991,25 +991,25 @@ mov r12,QWORD[80+rsp] sub rsp,160 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_5: - movdqa XMMWORD[rsp],xmm6 + vmovdqa XMMWORD[rsp],xmm6 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_6: - movdqa XMMWORD[16+rsp],xmm7 + vmovdqa XMMWORD[16+rsp],xmm7 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_7: - movdqa XMMWORD[32+rsp],xmm8 + vmovdqa XMMWORD[32+rsp],xmm8 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_8: - movdqa XMMWORD[48+rsp],xmm9 + vmovdqa XMMWORD[48+rsp],xmm9 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_9: - movdqa XMMWORD[64+rsp],xmm10 + vmovdqa XMMWORD[64+rsp],xmm10 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_10: - movdqa XMMWORD[80+rsp],xmm11 + vmovdqa XMMWORD[80+rsp],xmm11 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_11: - movdqa XMMWORD[96+rsp],xmm12 + vmovdqa XMMWORD[96+rsp],xmm12 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_12: - movdqa XMMWORD[112+rsp],xmm13 + vmovdqa XMMWORD[112+rsp],xmm13 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_13: - movdqa XMMWORD[128+rsp],xmm14 + vmovdqa XMMWORD[128+rsp],xmm14 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_14: - movdqa XMMWORD[144+rsp],xmm15 + vmovdqa XMMWORD[144+rsp],xmm15 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_15: $L$SEH_endprologue_aes_gcm_dec_update_vaes_avx2_16: @@ -1415,16 +1415,16 @@ vmovdqu XMMWORD[r12],xmm1 vzeroupper - movdqa xmm6,XMMWORD[rsp] - movdqa xmm7,XMMWORD[16+rsp] - movdqa xmm8,XMMWORD[32+rsp] - movdqa xmm9,XMMWORD[48+rsp] - movdqa xmm10,XMMWORD[64+rsp] - movdqa xmm11,XMMWORD[80+rsp] - movdqa xmm12,XMMWORD[96+rsp] - movdqa xmm13,XMMWORD[112+rsp] - movdqa xmm14,XMMWORD[128+rsp] - movdqa xmm15,XMMWORD[144+rsp] + vmovdqa xmm6,XMMWORD[rsp] + vmovdqa xmm7,XMMWORD[16+rsp] + vmovdqa xmm8,XMMWORD[32+rsp] + vmovdqa xmm9,XMMWORD[48+rsp] + vmovdqa xmm10,XMMWORD[64+rsp] + vmovdqa xmm11,XMMWORD[80+rsp] + vmovdqa xmm12,XMMWORD[96+rsp] + vmovdqa xmm13,XMMWORD[112+rsp] + vmovdqa xmm14,XMMWORD[128+rsp] + vmovdqa xmm15,XMMWORD[144+rsp] add rsp,160 pop r12 pop rdi