Use vmovdqa to save/restore xmm registers in AES-GCM code on Windows

The Windows xmm register save/restore code generated by
aes-gcm-avx2-x86_64.pl and aes-gcm-avx10-x86_64.pl used movdqa, which is
a legacy SSE instruction.  This was functionally correct, but it was the
only use of legacy SSE instructions in these files.  Since these files
contain AVX code, use the VEX encoded forms of these instructions
instead.  They are not any longer (in fact they're one byte shorter for
xmm8 and higher), and they have the added bonus of not having their
performance be dependent on whether other code executed vzeroupper.

Change-Id: Ib41ae1097d30d88dfcd4c68c0e850104034a5646
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/77228
Reviewed-by: David Benjamin <davidben@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/aes/asm/aes-gcm-avx10-x86_64.pl b/crypto/fipsmodule/aes/asm/aes-gcm-avx10-x86_64.pl
index 2f35323..6dc3da0 100644
--- a/crypto/fipsmodule/aes/asm/aes-gcm-avx10-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/aes-gcm-avx10-x86_64.pl
@@ -159,7 +159,7 @@
         for my $i ( 0 .. $num_xmmregs - 1 ) {
             my $reg_num = $xmmregs[$i];
             my $pos     = 16 * $i;
-            $code .= "movdqa %xmm$reg_num, $pos(%rsp)\n";
+            $code .= "vmovdqa %xmm$reg_num, $pos(%rsp)\n";
             $code .= ".seh_savexmm %xmm$reg_num, $pos\n";
         }
     }
@@ -177,7 +177,7 @@
         for my $i ( 0 .. $num_xmmregs - 1 ) {
             my $reg_num = $g_cur_func_saved_xmmregs[$i];
             my $pos     = 16 * $i;
-            $code .= "movdqa $pos(%rsp), %xmm$reg_num\n";
+            $code .= "vmovdqa $pos(%rsp), %xmm$reg_num\n";
         }
         $code .= "add \$$alloc_size, %rsp\n";
     }
diff --git a/crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl b/crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl
index deec309..3a1832c 100644
--- a/crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/aes-gcm-avx2-x86_64.pl
@@ -121,7 +121,7 @@
         for my $i ( 0 .. $num_xmmregs - 1 ) {
             my $reg_num = $xmmregs[$i];
             my $pos     = 16 * $i;
-            $code .= "movdqa %xmm$reg_num, $pos(%rsp)\n";
+            $code .= "vmovdqa %xmm$reg_num, $pos(%rsp)\n";
             $code .= ".seh_savexmm %xmm$reg_num, $pos\n";
         }
     }
@@ -139,7 +139,7 @@
         for my $i ( 0 .. $num_xmmregs - 1 ) {
             my $reg_num = $g_cur_func_saved_xmmregs[$i];
             my $pos     = 16 * $i;
-            $code .= "movdqa $pos(%rsp), %xmm$reg_num\n";
+            $code .= "vmovdqa $pos(%rsp), %xmm$reg_num\n";
         }
         $code .= "add \$$alloc_size, %rsp\n";
     }
diff --git a/gen/bcm/aes-gcm-avx10-x86_64-win.asm b/gen/bcm/aes-gcm-avx10-x86_64-win.asm
index f2073e7..8beb10e 100644
--- a/gen/bcm/aes-gcm-avx10-x86_64-win.asm
+++ b/gen/bcm/aes-gcm-avx10-x86_64-win.asm
@@ -56,7 +56,7 @@
 _CET_ENDBR
 	sub	rsp,24
 $L$SEH_prologue_gcm_gmult_vpclmulqdq_avx10_2:
-	movdqa	XMMWORD[rsp],xmm6
+	vmovdqa	XMMWORD[rsp],xmm6
 $L$SEH_prologue_gcm_gmult_vpclmulqdq_avx10_3:
 
 $L$SEH_endprologue_gcm_gmult_vpclmulqdq_avx10_4:
@@ -82,7 +82,7 @@
 
 	vpshufb	xmm0,xmm0,xmm1
 	vmovdqu	XMMWORD[rcx],xmm0
-	movdqa	xmm6,XMMWORD[rsp]
+	vmovdqa	xmm6,XMMWORD[rsp]
 	add	rsp,24
 	ret
 $L$SEH_end_gcm_gmult_vpclmulqdq_avx10_5:
@@ -203,21 +203,21 @@
 _CET_ENDBR
 	sub	rsp,136
 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_2:
-	movdqa	XMMWORD[rsp],xmm6
+	vmovdqa	XMMWORD[rsp],xmm6
 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_3:
-	movdqa	XMMWORD[16+rsp],xmm7
+	vmovdqa	XMMWORD[16+rsp],xmm7
 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_4:
-	movdqa	XMMWORD[32+rsp],xmm8
+	vmovdqa	XMMWORD[32+rsp],xmm8
 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_5:
-	movdqa	XMMWORD[48+rsp],xmm9
+	vmovdqa	XMMWORD[48+rsp],xmm9
 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_6:
-	movdqa	XMMWORD[64+rsp],xmm10
+	vmovdqa	XMMWORD[64+rsp],xmm10
 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_7:
-	movdqa	XMMWORD[80+rsp],xmm11
+	vmovdqa	XMMWORD[80+rsp],xmm11
 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_8:
-	movdqa	XMMWORD[96+rsp],xmm12
+	vmovdqa	XMMWORD[96+rsp],xmm12
 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_9:
-	movdqa	XMMWORD[112+rsp],xmm13
+	vmovdqa	XMMWORD[112+rsp],xmm13
 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_512_10:
 
 $L$SEH_endprologue_gcm_ghash_vpclmulqdq_avx10_512_11:
@@ -367,14 +367,14 @@
 	vmovdqu	XMMWORD[rcx],xmm5
 
 	vzeroupper
-	movdqa	xmm6,XMMWORD[rsp]
-	movdqa	xmm7,XMMWORD[16+rsp]
-	movdqa	xmm8,XMMWORD[32+rsp]
-	movdqa	xmm9,XMMWORD[48+rsp]
-	movdqa	xmm10,XMMWORD[64+rsp]
-	movdqa	xmm11,XMMWORD[80+rsp]
-	movdqa	xmm12,XMMWORD[96+rsp]
-	movdqa	xmm13,XMMWORD[112+rsp]
+	vmovdqa	xmm6,XMMWORD[rsp]
+	vmovdqa	xmm7,XMMWORD[16+rsp]
+	vmovdqa	xmm8,XMMWORD[32+rsp]
+	vmovdqa	xmm9,XMMWORD[48+rsp]
+	vmovdqa	xmm10,XMMWORD[64+rsp]
+	vmovdqa	xmm11,XMMWORD[80+rsp]
+	vmovdqa	xmm12,XMMWORD[96+rsp]
+	vmovdqa	xmm13,XMMWORD[112+rsp]
 	add	rsp,136
 	ret
 $L$SEH_end_gcm_ghash_vpclmulqdq_avx10_512_12:
@@ -399,25 +399,25 @@
 	mov	r12,QWORD[80+rsp]
 	sub	rsp,160
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_5:
-	movdqa	XMMWORD[rsp],xmm6
+	vmovdqa	XMMWORD[rsp],xmm6
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_6:
-	movdqa	XMMWORD[16+rsp],xmm7
+	vmovdqa	XMMWORD[16+rsp],xmm7
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_7:
-	movdqa	XMMWORD[32+rsp],xmm8
+	vmovdqa	XMMWORD[32+rsp],xmm8
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_8:
-	movdqa	XMMWORD[48+rsp],xmm9
+	vmovdqa	XMMWORD[48+rsp],xmm9
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_9:
-	movdqa	XMMWORD[64+rsp],xmm10
+	vmovdqa	XMMWORD[64+rsp],xmm10
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_10:
-	movdqa	XMMWORD[80+rsp],xmm11
+	vmovdqa	XMMWORD[80+rsp],xmm11
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_11:
-	movdqa	XMMWORD[96+rsp],xmm12
+	vmovdqa	XMMWORD[96+rsp],xmm12
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_12:
-	movdqa	XMMWORD[112+rsp],xmm13
+	vmovdqa	XMMWORD[112+rsp],xmm13
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_13:
-	movdqa	XMMWORD[128+rsp],xmm14
+	vmovdqa	XMMWORD[128+rsp],xmm14
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_14:
-	movdqa	XMMWORD[144+rsp],xmm15
+	vmovdqa	XMMWORD[144+rsp],xmm15
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_512_15:
 
 $L$SEH_endprologue_aes_gcm_enc_update_vaes_avx10_512_16:
@@ -894,16 +894,16 @@
 	vmovdqu	XMMWORD[r12],xmm10
 
 	vzeroupper
-	movdqa	xmm6,XMMWORD[rsp]
-	movdqa	xmm7,XMMWORD[16+rsp]
-	movdqa	xmm8,XMMWORD[32+rsp]
-	movdqa	xmm9,XMMWORD[48+rsp]
-	movdqa	xmm10,XMMWORD[64+rsp]
-	movdqa	xmm11,XMMWORD[80+rsp]
-	movdqa	xmm12,XMMWORD[96+rsp]
-	movdqa	xmm13,XMMWORD[112+rsp]
-	movdqa	xmm14,XMMWORD[128+rsp]
-	movdqa	xmm15,XMMWORD[144+rsp]
+	vmovdqa	xmm6,XMMWORD[rsp]
+	vmovdqa	xmm7,XMMWORD[16+rsp]
+	vmovdqa	xmm8,XMMWORD[32+rsp]
+	vmovdqa	xmm9,XMMWORD[48+rsp]
+	vmovdqa	xmm10,XMMWORD[64+rsp]
+	vmovdqa	xmm11,XMMWORD[80+rsp]
+	vmovdqa	xmm12,XMMWORD[96+rsp]
+	vmovdqa	xmm13,XMMWORD[112+rsp]
+	vmovdqa	xmm14,XMMWORD[128+rsp]
+	vmovdqa	xmm15,XMMWORD[144+rsp]
 	add	rsp,160
 	pop	r12
 	pop	rdi
@@ -931,25 +931,25 @@
 	mov	r12,QWORD[80+rsp]
 	sub	rsp,160
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_5:
-	movdqa	XMMWORD[rsp],xmm6
+	vmovdqa	XMMWORD[rsp],xmm6
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_6:
-	movdqa	XMMWORD[16+rsp],xmm7
+	vmovdqa	XMMWORD[16+rsp],xmm7
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_7:
-	movdqa	XMMWORD[32+rsp],xmm8
+	vmovdqa	XMMWORD[32+rsp],xmm8
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_8:
-	movdqa	XMMWORD[48+rsp],xmm9
+	vmovdqa	XMMWORD[48+rsp],xmm9
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_9:
-	movdqa	XMMWORD[64+rsp],xmm10
+	vmovdqa	XMMWORD[64+rsp],xmm10
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_10:
-	movdqa	XMMWORD[80+rsp],xmm11
+	vmovdqa	XMMWORD[80+rsp],xmm11
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_11:
-	movdqa	XMMWORD[96+rsp],xmm12
+	vmovdqa	XMMWORD[96+rsp],xmm12
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_12:
-	movdqa	XMMWORD[112+rsp],xmm13
+	vmovdqa	XMMWORD[112+rsp],xmm13
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_13:
-	movdqa	XMMWORD[128+rsp],xmm14
+	vmovdqa	XMMWORD[128+rsp],xmm14
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_14:
-	movdqa	XMMWORD[144+rsp],xmm15
+	vmovdqa	XMMWORD[144+rsp],xmm15
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_512_15:
 
 $L$SEH_endprologue_aes_gcm_dec_update_vaes_avx10_512_16:
@@ -1336,16 +1336,16 @@
 	vmovdqu	XMMWORD[r12],xmm10
 
 	vzeroupper
-	movdqa	xmm6,XMMWORD[rsp]
-	movdqa	xmm7,XMMWORD[16+rsp]
-	movdqa	xmm8,XMMWORD[32+rsp]
-	movdqa	xmm9,XMMWORD[48+rsp]
-	movdqa	xmm10,XMMWORD[64+rsp]
-	movdqa	xmm11,XMMWORD[80+rsp]
-	movdqa	xmm12,XMMWORD[96+rsp]
-	movdqa	xmm13,XMMWORD[112+rsp]
-	movdqa	xmm14,XMMWORD[128+rsp]
-	movdqa	xmm15,XMMWORD[144+rsp]
+	vmovdqa	xmm6,XMMWORD[rsp]
+	vmovdqa	xmm7,XMMWORD[16+rsp]
+	vmovdqa	xmm8,XMMWORD[32+rsp]
+	vmovdqa	xmm9,XMMWORD[48+rsp]
+	vmovdqa	xmm10,XMMWORD[64+rsp]
+	vmovdqa	xmm11,XMMWORD[80+rsp]
+	vmovdqa	xmm12,XMMWORD[96+rsp]
+	vmovdqa	xmm13,XMMWORD[112+rsp]
+	vmovdqa	xmm14,XMMWORD[128+rsp]
+	vmovdqa	xmm15,XMMWORD[144+rsp]
 	add	rsp,160
 	pop	r12
 	pop	rdi
diff --git a/gen/bcm/aes-gcm-avx2-x86_64-win.asm b/gen/bcm/aes-gcm-avx2-x86_64-win.asm
index aec14b3..638348e 100644
--- a/gen/bcm/aes-gcm-avx2-x86_64-win.asm
+++ b/gen/bcm/aes-gcm-avx2-x86_64-win.asm
@@ -52,7 +52,7 @@
 _CET_ENDBR
 	sub	rsp,24
 $L$SEH_prologue_gcm_init_vpclmulqdq_avx2_2:
-	movdqa	XMMWORD[rsp],xmm6
+	vmovdqa	XMMWORD[rsp],xmm6
 $L$SEH_prologue_gcm_init_vpclmulqdq_avx2_3:
 
 $L$SEH_endprologue_gcm_init_vpclmulqdq_avx2_4:
@@ -160,7 +160,7 @@
 	vmovdqu	YMMWORD[128+rcx],ymm0
 
 	vzeroupper
-	movdqa	xmm6,XMMWORD[rsp]
+	vmovdqa	xmm6,XMMWORD[rsp]
 	add	rsp,24
 	ret
 $L$SEH_end_gcm_init_vpclmulqdq_avx2_5:
@@ -175,7 +175,7 @@
 _CET_ENDBR
 	sub	rsp,24
 $L$SEH_prologue_gcm_gmult_vpclmulqdq_avx2_2:
-	movdqa	XMMWORD[rsp],xmm6
+	vmovdqa	XMMWORD[rsp],xmm6
 $L$SEH_prologue_gcm_gmult_vpclmulqdq_avx2_3:
 
 $L$SEH_endprologue_gcm_gmult_vpclmulqdq_avx2_4:
@@ -203,7 +203,7 @@
 
 	vpshufb	xmm0,xmm0,xmm1
 	vmovdqu	XMMWORD[rcx],xmm0
-	movdqa	xmm6,XMMWORD[rsp]
+	vmovdqa	xmm6,XMMWORD[rsp]
 	add	rsp,24
 	ret
 $L$SEH_end_gcm_gmult_vpclmulqdq_avx2_5:
@@ -218,13 +218,13 @@
 _CET_ENDBR
 	sub	rsp,72
 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx2_2:
-	movdqa	XMMWORD[rsp],xmm6
+	vmovdqa	XMMWORD[rsp],xmm6
 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx2_3:
-	movdqa	XMMWORD[16+rsp],xmm7
+	vmovdqa	XMMWORD[16+rsp],xmm7
 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx2_4:
-	movdqa	XMMWORD[32+rsp],xmm8
+	vmovdqa	XMMWORD[32+rsp],xmm8
 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx2_5:
-	movdqa	XMMWORD[48+rsp],xmm9
+	vmovdqa	XMMWORD[48+rsp],xmm9
 $L$SEH_prologue_gcm_ghash_vpclmulqdq_avx2_6:
 
 $L$SEH_endprologue_gcm_ghash_vpclmulqdq_avx2_7:
@@ -384,10 +384,10 @@
 	vmovdqu	XMMWORD[rcx],xmm5
 
 	vzeroupper
-	movdqa	xmm6,XMMWORD[rsp]
-	movdqa	xmm7,XMMWORD[16+rsp]
-	movdqa	xmm8,XMMWORD[32+rsp]
-	movdqa	xmm9,XMMWORD[48+rsp]
+	vmovdqa	xmm6,XMMWORD[rsp]
+	vmovdqa	xmm7,XMMWORD[16+rsp]
+	vmovdqa	xmm8,XMMWORD[32+rsp]
+	vmovdqa	xmm9,XMMWORD[48+rsp]
 	add	rsp,72
 	ret
 $L$SEH_end_gcm_ghash_vpclmulqdq_avx2_8:
@@ -412,25 +412,25 @@
 	mov	r12,QWORD[80+rsp]
 	sub	rsp,160
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_5:
-	movdqa	XMMWORD[rsp],xmm6
+	vmovdqa	XMMWORD[rsp],xmm6
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_6:
-	movdqa	XMMWORD[16+rsp],xmm7
+	vmovdqa	XMMWORD[16+rsp],xmm7
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_7:
-	movdqa	XMMWORD[32+rsp],xmm8
+	vmovdqa	XMMWORD[32+rsp],xmm8
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_8:
-	movdqa	XMMWORD[48+rsp],xmm9
+	vmovdqa	XMMWORD[48+rsp],xmm9
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_9:
-	movdqa	XMMWORD[64+rsp],xmm10
+	vmovdqa	XMMWORD[64+rsp],xmm10
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_10:
-	movdqa	XMMWORD[80+rsp],xmm11
+	vmovdqa	XMMWORD[80+rsp],xmm11
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_11:
-	movdqa	XMMWORD[96+rsp],xmm12
+	vmovdqa	XMMWORD[96+rsp],xmm12
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_12:
-	movdqa	XMMWORD[112+rsp],xmm13
+	vmovdqa	XMMWORD[112+rsp],xmm13
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_13:
-	movdqa	XMMWORD[128+rsp],xmm14
+	vmovdqa	XMMWORD[128+rsp],xmm14
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_14:
-	movdqa	XMMWORD[144+rsp],xmm15
+	vmovdqa	XMMWORD[144+rsp],xmm15
 $L$SEH_prologue_aes_gcm_enc_update_vaes_avx2_15:
 
 $L$SEH_endprologue_aes_gcm_enc_update_vaes_avx2_16:
@@ -954,16 +954,16 @@
 	vmovdqu	XMMWORD[r12],xmm1
 
 	vzeroupper
-	movdqa	xmm6,XMMWORD[rsp]
-	movdqa	xmm7,XMMWORD[16+rsp]
-	movdqa	xmm8,XMMWORD[32+rsp]
-	movdqa	xmm9,XMMWORD[48+rsp]
-	movdqa	xmm10,XMMWORD[64+rsp]
-	movdqa	xmm11,XMMWORD[80+rsp]
-	movdqa	xmm12,XMMWORD[96+rsp]
-	movdqa	xmm13,XMMWORD[112+rsp]
-	movdqa	xmm14,XMMWORD[128+rsp]
-	movdqa	xmm15,XMMWORD[144+rsp]
+	vmovdqa	xmm6,XMMWORD[rsp]
+	vmovdqa	xmm7,XMMWORD[16+rsp]
+	vmovdqa	xmm8,XMMWORD[32+rsp]
+	vmovdqa	xmm9,XMMWORD[48+rsp]
+	vmovdqa	xmm10,XMMWORD[64+rsp]
+	vmovdqa	xmm11,XMMWORD[80+rsp]
+	vmovdqa	xmm12,XMMWORD[96+rsp]
+	vmovdqa	xmm13,XMMWORD[112+rsp]
+	vmovdqa	xmm14,XMMWORD[128+rsp]
+	vmovdqa	xmm15,XMMWORD[144+rsp]
 	add	rsp,160
 	pop	r12
 	pop	rdi
@@ -991,25 +991,25 @@
 	mov	r12,QWORD[80+rsp]
 	sub	rsp,160
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_5:
-	movdqa	XMMWORD[rsp],xmm6
+	vmovdqa	XMMWORD[rsp],xmm6
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_6:
-	movdqa	XMMWORD[16+rsp],xmm7
+	vmovdqa	XMMWORD[16+rsp],xmm7
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_7:
-	movdqa	XMMWORD[32+rsp],xmm8
+	vmovdqa	XMMWORD[32+rsp],xmm8
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_8:
-	movdqa	XMMWORD[48+rsp],xmm9
+	vmovdqa	XMMWORD[48+rsp],xmm9
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_9:
-	movdqa	XMMWORD[64+rsp],xmm10
+	vmovdqa	XMMWORD[64+rsp],xmm10
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_10:
-	movdqa	XMMWORD[80+rsp],xmm11
+	vmovdqa	XMMWORD[80+rsp],xmm11
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_11:
-	movdqa	XMMWORD[96+rsp],xmm12
+	vmovdqa	XMMWORD[96+rsp],xmm12
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_12:
-	movdqa	XMMWORD[112+rsp],xmm13
+	vmovdqa	XMMWORD[112+rsp],xmm13
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_13:
-	movdqa	XMMWORD[128+rsp],xmm14
+	vmovdqa	XMMWORD[128+rsp],xmm14
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_14:
-	movdqa	XMMWORD[144+rsp],xmm15
+	vmovdqa	XMMWORD[144+rsp],xmm15
 $L$SEH_prologue_aes_gcm_dec_update_vaes_avx2_15:
 
 $L$SEH_endprologue_aes_gcm_dec_update_vaes_avx2_16:
@@ -1415,16 +1415,16 @@
 	vmovdqu	XMMWORD[r12],xmm1
 
 	vzeroupper
-	movdqa	xmm6,XMMWORD[rsp]
-	movdqa	xmm7,XMMWORD[16+rsp]
-	movdqa	xmm8,XMMWORD[32+rsp]
-	movdqa	xmm9,XMMWORD[48+rsp]
-	movdqa	xmm10,XMMWORD[64+rsp]
-	movdqa	xmm11,XMMWORD[80+rsp]
-	movdqa	xmm12,XMMWORD[96+rsp]
-	movdqa	xmm13,XMMWORD[112+rsp]
-	movdqa	xmm14,XMMWORD[128+rsp]
-	movdqa	xmm15,XMMWORD[144+rsp]
+	vmovdqa	xmm6,XMMWORD[rsp]
+	vmovdqa	xmm7,XMMWORD[16+rsp]
+	vmovdqa	xmm8,XMMWORD[32+rsp]
+	vmovdqa	xmm9,XMMWORD[48+rsp]
+	vmovdqa	xmm10,XMMWORD[64+rsp]
+	vmovdqa	xmm11,XMMWORD[80+rsp]
+	vmovdqa	xmm12,XMMWORD[96+rsp]
+	vmovdqa	xmm13,XMMWORD[112+rsp]
+	vmovdqa	xmm14,XMMWORD[128+rsp]
+	vmovdqa	xmm15,XMMWORD[144+rsp]
 	add	rsp,160
 	pop	r12
 	pop	rdi