Disable the "AVX10/256" AES-GCM functions for now

Since there is now a VAES+AVX2 implementation of AES-GCM, and the future
of AVX10/256 is uncertain, disable the AES-GCM functions that use
AVX10/256 (equivalently AVX512 with a maximum vector length of 256
bits).  This leaves VAES+AVX2 as the sole 256-bit support for now.

For now this just affects Intel Ice Lake and Tiger Lake (which actually
support AVX512, but where downclocking issues make 256-bit arguably
preferable to 512-bit), where a slight performance loss is seen on long
messages.  The following tables compare AES-256-GCM throughput in MB/s
on Ice Lake server for various message lengths:

Encryption:

            | 16384 |  4096 |  4095 |  1420 |   512 |   500 |
    --------+-------+-------+-------+-------+-------+-------+
    Before  |  7533 |  6990 |  6220 |  5096 |  4200 |  2702 |
    After   |  7403 |  6879 |  6236 |  4980 |  4040 |  2868 |

            |   300 |   200 |    64 |    63 |    16 |
    --------+-------+-------+-------+-------+-------+
    Before  |  2086 |  1555 |  1031 |   657 |   433 |
    After   |  2069 |  1635 |  1045 |   667 |   430 |

Decryption:

            | 16384 |  4096 |  4095 |  1420 |   512 |   500 |
    --------+-------+-------+-------+-------+-------+-------+
    Before  |  7703 |  7140 |  6524 |  5283 |  4244 |  2990 |
    After   |  7572 |  7056 |  6494 |  5155 |  4224 |  3073 |

            |   300 |   200 |    64 |    63 |    16 |
    --------+-------+-------+-------+-------+-------+
    Before  |  2276 |  1733 |  1070 |   680 |   447 |
    After   |  2249 |  1743 |  1100 |   692 |   447 |

This change should be reconsidered if AVX10/256 sees widespread support,
as we shouldn't carry forward a restriction to AVX2 unnecessarily.

This change also replaces gcm_init_vpclmulqdq_avx10 with
gcm_init_vpclmulqdq_avx10_512, now instantiated using 512-bit vectors.
Otherwise it would be the only avx10 function left using 256-bit.

Change-Id: I7fd21568482118a2ce7a382e9042b187cd2739f7
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/74369
Reviewed-by: David Benjamin <davidben@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/modes/asm/aes-gcm-avx10-x86_64.pl b/crypto/fipsmodule/modes/asm/aes-gcm-avx10-x86_64.pl
index 269ac80..06ea7e6 100644
--- a/crypto/fipsmodule/modes/asm/aes-gcm-avx10-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/aes-gcm-avx10-x86_64.pl
@@ -1321,26 +1321,31 @@
 }
 $code .= _end_func;
 
-_set_veclen 32;
-
-$code .= _begin_func "gcm_init_vpclmulqdq_avx10", 0;
-$code .= _aes_gcm_init;
-$code .= _end_func;
-
-$code .= _begin_func "gcm_ghash_vpclmulqdq_avx10_256", 1;
-$code .= _ghash_update;
-$code .= _end_func;
-
-$code .= _begin_func "aes_gcm_enc_update_vaes_avx10_256", 1;
-$code .= _aes_gcm_update 1;
-$code .= _end_func;
-
-$code .= _begin_func "aes_gcm_dec_update_vaes_avx10_256", 1;
-$code .= _aes_gcm_update 0;
-$code .= _end_func;
+# Disabled until significant deployment of AVX10/256 is seen.  The separate
+# *_vaes_avx2 implementation provides the only 256-bit support for now.
+#
+# $code .= _begin_func "gcm_init_vpclmulqdq_avx10_256", 0;
+# $code .= _aes_gcm_init;
+# $code .= _end_func;
+#
+# $code .= _begin_func "gcm_ghash_vpclmulqdq_avx10_256", 1;
+# $code .= _ghash_update;
+# $code .= _end_func;
+#
+# $code .= _begin_func "aes_gcm_enc_update_vaes_avx10_256", 1;
+# $code .= _aes_gcm_update 1;
+# $code .= _end_func;
+#
+# $code .= _begin_func "aes_gcm_dec_update_vaes_avx10_256", 1;
+# $code .= _aes_gcm_update 0;
+# $code .= _end_func;
 
 _set_veclen 64;
 
+$code .= _begin_func "gcm_init_vpclmulqdq_avx10_512", 0;
+$code .= _aes_gcm_init;
+$code .= _end_func;
+
 $code .= _begin_func "gcm_ghash_vpclmulqdq_avx10_512", 1;
 $code .= _ghash_update;
 $code .= _end_func;
diff --git a/crypto/fipsmodule/modes/gcm.cc.inc b/crypto/fipsmodule/modes/gcm.cc.inc
index e77c525..d8ccf00 100644
--- a/crypto/fipsmodule/modes/gcm.cc.inc
+++ b/crypto/fipsmodule/modes/gcm.cc.inc
@@ -104,11 +104,6 @@
       aes_gcm_enc_update_vaes_avx2(in, out, len, key, ivec, Htable, Xi);
       CRYPTO_store_u32_be(&ivec[12], CRYPTO_load_u32_be(&ivec[12]) + len / 16);
       return len;
-    case gcm_x86_vaes_avx10_256:
-      len &= kSizeTWithoutLower4Bits;
-      aes_gcm_enc_update_vaes_avx10_256(in, out, len, key, ivec, Htable, Xi);
-      CRYPTO_store_u32_be(&ivec[12], CRYPTO_load_u32_be(&ivec[12]) + len / 16);
-      return len;
     case gcm_x86_vaes_avx10_512:
       len &= kSizeTWithoutLower4Bits;
       aes_gcm_enc_update_vaes_avx10_512(in, out, len, key, ivec, Htable, Xi);
@@ -129,11 +124,6 @@
       aes_gcm_dec_update_vaes_avx2(in, out, len, key, ivec, Htable, Xi);
       CRYPTO_store_u32_be(&ivec[12], CRYPTO_load_u32_be(&ivec[12]) + len / 16);
       return len;
-    case gcm_x86_vaes_avx10_256:
-      len &= kSizeTWithoutLower4Bits;
-      aes_gcm_dec_update_vaes_avx10_256(in, out, len, key, ivec, Htable, Xi);
-      CRYPTO_store_u32_be(&ivec[12], CRYPTO_load_u32_be(&ivec[12]) + len / 16);
-      return len;
     case gcm_x86_vaes_avx10_512:
       len &= kSizeTWithoutLower4Bits;
       aes_gcm_dec_update_vaes_avx10_512(in, out, len, key, ivec, Htable, Xi);
@@ -183,14 +173,10 @@
   if (crypto_gcm_clmul_enabled()) {
     if (CRYPTO_is_VPCLMULQDQ_capable() && CRYPTO_is_AVX2_capable()) {
       if (CRYPTO_is_AVX512BW_capable() && CRYPTO_is_AVX512VL_capable() &&
-          CRYPTO_is_BMI2_capable()) {
-        gcm_init_vpclmulqdq_avx10(out_table, H);
+          CRYPTO_is_BMI2_capable() && !CRYPTO_cpu_avoid_zmm_registers()) {
+        gcm_init_vpclmulqdq_avx10_512(out_table, H);
         *out_mult = gcm_gmult_vpclmulqdq_avx10;
-        if (CRYPTO_cpu_avoid_zmm_registers()) {
-          *out_hash = gcm_ghash_vpclmulqdq_avx10_256;
-        } else {
-          *out_hash = gcm_ghash_vpclmulqdq_avx10_512;
-        }
+        *out_hash = gcm_ghash_vpclmulqdq_avx10_512;
         return;
       }
       gcm_init_vpclmulqdq_avx2(out_table, H);
@@ -275,11 +261,8 @@
 
 #if !defined(OPENSSL_NO_ASM)
 #if defined(OPENSSL_X86_64)
-  if (gcm_key->ghash == gcm_ghash_vpclmulqdq_avx10_256 &&
+  if (gcm_key->ghash == gcm_ghash_vpclmulqdq_avx10_512 &&
       CRYPTO_is_VAES_capable()) {
-    gcm_key->impl = gcm_x86_vaes_avx10_256;
-  } else if (gcm_key->ghash == gcm_ghash_vpclmulqdq_avx10_512 &&
-             CRYPTO_is_VAES_capable()) {
     gcm_key->impl = gcm_x86_vaes_avx10_512;
   } else if (gcm_key->ghash == gcm_ghash_vpclmulqdq_avx2 &&
              CRYPTO_is_VAES_capable()) {
diff --git a/crypto/fipsmodule/modes/gcm_test.cc b/crypto/fipsmodule/modes/gcm_test.cc
index d195526..6329675 100644
--- a/crypto/fipsmodule/modes/gcm_test.cc
+++ b/crypto/fipsmodule/modes/gcm_test.cc
@@ -111,21 +111,15 @@
       static const uint8_t kKey[16] = {0};
       uint8_t iv[16] = {0};
 
-      CHECK_ABI_SEH(gcm_init_vpclmulqdq_avx10, Htable, kH);
+      CHECK_ABI_SEH(gcm_init_vpclmulqdq_avx10_512, Htable, kH);
       CHECK_ABI_SEH(gcm_gmult_vpclmulqdq_avx10, X, Htable);
       for (size_t blocks : kBlockCounts) {
-        CHECK_ABI_SEH(gcm_ghash_vpclmulqdq_avx10_256, X, Htable, buf,
-                      16 * blocks);
         CHECK_ABI_SEH(gcm_ghash_vpclmulqdq_avx10_512, X, Htable, buf,
                       16 * blocks);
       }
 
       aes_hw_set_encrypt_key(kKey, 128, &aes_key);
       for (size_t blocks : kBlockCounts) {
-        CHECK_ABI_SEH(aes_gcm_enc_update_vaes_avx10_256, buf, buf, blocks * 16,
-                      &aes_key, iv, Htable, X);
-        CHECK_ABI_SEH(aes_gcm_enc_update_vaes_avx10_256, buf, buf,
-                      blocks * 16 + 7, &aes_key, iv, Htable, X);
         CHECK_ABI_SEH(aes_gcm_enc_update_vaes_avx10_512, buf, buf, blocks * 16,
                       &aes_key, iv, Htable, X);
         CHECK_ABI_SEH(aes_gcm_enc_update_vaes_avx10_512, buf, buf,
@@ -133,10 +127,6 @@
       }
       aes_hw_set_decrypt_key(kKey, 128, &aes_key);
       for (size_t blocks : kBlockCounts) {
-        CHECK_ABI_SEH(aes_gcm_dec_update_vaes_avx10_256, buf, buf, blocks * 16,
-                      &aes_key, iv, Htable, X);
-        CHECK_ABI_SEH(aes_gcm_dec_update_vaes_avx10_256, buf, buf,
-                      blocks * 16 + 7, &aes_key, iv, Htable, X);
         CHECK_ABI_SEH(aes_gcm_dec_update_vaes_avx10_512, buf, buf, blocks * 16,
                       &aes_key, iv, Htable, X);
         CHECK_ABI_SEH(aes_gcm_dec_update_vaes_avx10_512, buf, buf,
diff --git a/crypto/fipsmodule/modes/internal.h b/crypto/fipsmodule/modes/internal.h
index f041bf8..7a6e9aa 100644
--- a/crypto/fipsmodule/modes/internal.h
+++ b/crypto/fipsmodule/modes/internal.h
@@ -70,7 +70,6 @@
   gcm_separate = 0,  // No combined AES-GCM, but may have AES-CTR and GHASH.
   gcm_x86_aesni,
   gcm_x86_vaes_avx2,
-  gcm_x86_vaes_avx10_256,
   gcm_x86_vaes_avx10_512,
   gcm_arm64_aes,
 };
@@ -212,20 +211,10 @@
                                   const AES_KEY *key, const uint8_t ivec[16],
                                   const u128 Htable[16], uint8_t Xi[16]);
 
-void gcm_init_vpclmulqdq_avx10(u128 Htable[16], const uint64_t H[2]);
+void gcm_init_vpclmulqdq_avx10_512(u128 Htable[16], const uint64_t H[2]);
 void gcm_gmult_vpclmulqdq_avx10(uint8_t Xi[16], const u128 Htable[16]);
-void gcm_ghash_vpclmulqdq_avx10_256(uint8_t Xi[16], const u128 Htable[16],
-                                    const uint8_t *in, size_t len);
 void gcm_ghash_vpclmulqdq_avx10_512(uint8_t Xi[16], const u128 Htable[16],
                                     const uint8_t *in, size_t len);
-void aes_gcm_enc_update_vaes_avx10_256(const uint8_t *in, uint8_t *out,
-                                       size_t len, const AES_KEY *key,
-                                       const uint8_t ivec[16],
-                                       const u128 Htable[16], uint8_t Xi[16]);
-void aes_gcm_dec_update_vaes_avx10_256(const uint8_t *in, uint8_t *out,
-                                       size_t len, const AES_KEY *key,
-                                       const uint8_t ivec[16],
-                                       const u128 Htable[16], uint8_t Xi[16]);
 void aes_gcm_enc_update_vaes_avx10_512(const uint8_t *in, uint8_t *out,
                                        size_t len, const AES_KEY *key,
                                        const uint8_t ivec[16],
diff --git a/crypto/impl_dispatch_test.cc b/crypto/impl_dispatch_test.cc
index bfd0045..26913c7 100644
--- a/crypto/impl_dispatch_test.cc
+++ b/crypto/impl_dispatch_test.cc
@@ -95,7 +95,6 @@
 constexpr size_t kFlag_aes_hw_set_encrypt_key = 3;
 constexpr size_t kFlag_vpaes_encrypt = 4;
 constexpr size_t kFlag_vpaes_set_encrypt_key = 5;
-constexpr size_t kFlag_aes_gcm_enc_update_vaes_avx10_256 = 6;
 constexpr size_t kFlag_aes_gcm_enc_update_vaes_avx10_512 = 7;
 constexpr size_t kFlag_aes_gcm_enc_update_vaes_avx2 = 8;
 
@@ -109,11 +108,10 @@
            is_x86_64_ && aesni_ && avx_movbe_ && !vaes_},
           {kFlag_vpaes_encrypt, ssse3_ && !aesni_},
           {kFlag_vpaes_set_encrypt_key, ssse3_ && !aesni_},
-          {kFlag_aes_gcm_enc_update_vaes_avx10_256,
-           is_x86_64_ && vaes_ && avx10_ && avoid_zmm_},
           {kFlag_aes_gcm_enc_update_vaes_avx10_512,
            is_x86_64_ && vaes_ && avx10_ && !avoid_zmm_},
-          {kFlag_aes_gcm_enc_update_vaes_avx2, is_x86_64_ && vaes_ && !avx10_},
+          {kFlag_aes_gcm_enc_update_vaes_avx2,
+           is_x86_64_ && vaes_ && !(avx10_ && !avoid_zmm_)},
       },
       [] {
         const uint8_t kZeros[16] = {0};
diff --git a/crypto/internal.h b/crypto/internal.h
index 62273c6..5ebfaff 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -1408,7 +1408,7 @@
 //   3: aes_hw_set_encrypt_key
 //   4: vpaes_encrypt
 //   5: vpaes_set_encrypt_key
-//   6: aes_gcm_enc_update_vaes_avx10_256
+//   6: aes_gcm_enc_update_vaes_avx10_256 [reserved]
 //   7: aes_gcm_enc_update_vaes_avx10_512
 //   8: aes_gcm_enc_update_vaes_avx2
 extern uint8_t BORINGSSL_function_hit[9];
diff --git a/gen/bcm/aes-gcm-avx10-x86_64-apple.S b/gen/bcm/aes-gcm-avx10-x86_64-apple.S
index b75bb07..54fcde0 100644
--- a/gen/bcm/aes-gcm-avx10-x86_64-apple.S
+++ b/gen/bcm/aes-gcm-avx10-x86_64-apple.S
@@ -75,16 +75,16 @@
 
 
 
-.globl	_gcm_init_vpclmulqdq_avx10
-.private_extern _gcm_init_vpclmulqdq_avx10
+.globl	_gcm_init_vpclmulqdq_avx10_512
+.private_extern _gcm_init_vpclmulqdq_avx10_512
 
 .p2align	5
-_gcm_init_vpclmulqdq_avx10:
+_gcm_init_vpclmulqdq_avx10_512:
 
 
 _CET_ENDBR
 
-	leaq	256-32(%rdi),%r8
+	leaq	256-64(%rdi),%r8
 
 
 
@@ -112,7 +112,7 @@
 	vpternlogd	$0x78,L$gfpoly_and_internal_carrybit(%rip),%xmm0,%xmm3
 
 
-	vbroadcasti32x4	L$gfpoly(%rip),%ymm5
+	vbroadcasti32x4	L$gfpoly(%rip),%zmm5
 
 
 
@@ -137,16 +137,6 @@
 
 	vinserti128	$1,%xmm3,%ymm4,%ymm3
 	vinserti128	$1,%xmm4,%ymm4,%ymm4
-
-	vmovdqu8	%ymm3,(%r8)
-
-
-
-
-
-	movl	$7,%eax
-L$precompute_next__func1:
-	subq	$32,%r8
 	vpclmulqdq	$0x00,%ymm4,%ymm3,%ymm0
 	vpclmulqdq	$0x01,%ymm4,%ymm3,%ymm1
 	vpclmulqdq	$0x10,%ymm4,%ymm3,%ymm2
@@ -154,12 +144,36 @@
 	vpclmulqdq	$0x01,%ymm0,%ymm5,%ymm2
 	vpshufd	$0x4e,%ymm0,%ymm0
 	vpternlogd	$0x96,%ymm2,%ymm0,%ymm1
-	vpclmulqdq	$0x11,%ymm4,%ymm3,%ymm3
+	vpclmulqdq	$0x11,%ymm4,%ymm3,%ymm4
 	vpclmulqdq	$0x01,%ymm1,%ymm5,%ymm0
 	vpshufd	$0x4e,%ymm1,%ymm1
-	vpternlogd	$0x96,%ymm0,%ymm1,%ymm3
+	vpternlogd	$0x96,%ymm0,%ymm1,%ymm4
 
-	vmovdqu8	%ymm3,(%r8)
+	vinserti64x4	$1,%ymm3,%zmm4,%zmm3
+	vshufi64x2	$0,%zmm4,%zmm4,%zmm4
+
+	vmovdqu8	%zmm3,(%r8)
+
+
+
+
+
+	movl	$3,%eax
+L$precompute_next__func1:
+	subq	$64,%r8
+	vpclmulqdq	$0x00,%zmm4,%zmm3,%zmm0
+	vpclmulqdq	$0x01,%zmm4,%zmm3,%zmm1
+	vpclmulqdq	$0x10,%zmm4,%zmm3,%zmm2
+	vpxord	%zmm2,%zmm1,%zmm1
+	vpclmulqdq	$0x01,%zmm0,%zmm5,%zmm2
+	vpshufd	$0x4e,%zmm0,%zmm0
+	vpternlogd	$0x96,%zmm2,%zmm0,%zmm1
+	vpclmulqdq	$0x11,%zmm4,%zmm3,%zmm3
+	vpclmulqdq	$0x01,%zmm1,%zmm5,%zmm0
+	vpshufd	$0x4e,%zmm1,%zmm1
+	vpternlogd	$0x96,%zmm0,%zmm1,%zmm3
+
+	vmovdqu8	%zmm3,(%r8)
 	decl	%eax
 	jnz	L$precompute_next__func1
 
@@ -168,1042 +182,6 @@
 
 
 
-.globl	_gcm_ghash_vpclmulqdq_avx10_256
-.private_extern _gcm_ghash_vpclmulqdq_avx10_256
-
-.p2align	5
-_gcm_ghash_vpclmulqdq_avx10_256:
-
-
-_CET_ENDBR
-
-
-
-
-
-
-	vmovdqu	L$bswap_mask(%rip),%xmm4
-	vmovdqu	L$gfpoly(%rip),%xmm10
-
-
-	vmovdqu	(%rdi),%xmm5
-	vpshufb	%xmm4,%xmm5,%xmm5
-
-
-	cmpq	$32,%rcx
-	jb	L$aad_blockbyblock__func1
-
-
-
-	vshufi64x2	$0,%ymm4,%ymm4,%ymm4
-	vshufi64x2	$0,%ymm10,%ymm10,%ymm10
-
-
-	vmovdqu8	256-32(%rsi),%ymm9
-
-	cmpq	$128-1,%rcx
-	jbe	L$aad_loop_1x__func1
-
-
-	vmovdqu8	256-128(%rsi),%ymm6
-	vmovdqu8	256-96(%rsi),%ymm7
-	vmovdqu8	256-64(%rsi),%ymm8
-
-
-L$aad_loop_4x__func1:
-	vmovdqu8	0(%rdx),%ymm0
-	vmovdqu8	32(%rdx),%ymm1
-	vmovdqu8	64(%rdx),%ymm2
-	vmovdqu8	96(%rdx),%ymm3
-	vpshufb	%ymm4,%ymm0,%ymm0
-	vpxord	%ymm5,%ymm0,%ymm0
-	vpshufb	%ymm4,%ymm1,%ymm1
-	vpshufb	%ymm4,%ymm2,%ymm2
-	vpshufb	%ymm4,%ymm3,%ymm3
-	vpclmulqdq	$0x00,%ymm6,%ymm0,%ymm5
-	vpclmulqdq	$0x00,%ymm7,%ymm1,%ymm11
-	vpclmulqdq	$0x00,%ymm8,%ymm2,%ymm12
-	vpxord	%ymm11,%ymm5,%ymm5
-	vpclmulqdq	$0x00,%ymm9,%ymm3,%ymm13
-	vpternlogd	$0x96,%ymm13,%ymm12,%ymm5
-	vpclmulqdq	$0x01,%ymm6,%ymm0,%ymm11
-	vpclmulqdq	$0x01,%ymm7,%ymm1,%ymm12
-	vpclmulqdq	$0x01,%ymm8,%ymm2,%ymm13
-	vpternlogd	$0x96,%ymm13,%ymm12,%ymm11
-	vpclmulqdq	$0x01,%ymm9,%ymm3,%ymm12
-	vpclmulqdq	$0x10,%ymm6,%ymm0,%ymm13
-	vpternlogd	$0x96,%ymm13,%ymm12,%ymm11
-	vpclmulqdq	$0x10,%ymm7,%ymm1,%ymm12
-	vpclmulqdq	$0x10,%ymm8,%ymm2,%ymm13
-	vpternlogd	$0x96,%ymm13,%ymm12,%ymm11
-	vpclmulqdq	$0x01,%ymm5,%ymm10,%ymm13
-	vpclmulqdq	$0x10,%ymm9,%ymm3,%ymm12
-	vpxord	%ymm12,%ymm11,%ymm11
-	vpshufd	$0x4e,%ymm5,%ymm5
-	vpclmulqdq	$0x11,%ymm6,%ymm0,%ymm0
-	vpclmulqdq	$0x11,%ymm7,%ymm1,%ymm1
-	vpclmulqdq	$0x11,%ymm8,%ymm2,%ymm2
-	vpternlogd	$0x96,%ymm13,%ymm5,%ymm11
-	vpclmulqdq	$0x11,%ymm9,%ymm3,%ymm3
-	vpternlogd	$0x96,%ymm2,%ymm1,%ymm0
-	vpclmulqdq	$0x01,%ymm11,%ymm10,%ymm12
-	vpxord	%ymm3,%ymm0,%ymm5
-	vpshufd	$0x4e,%ymm11,%ymm11
-	vpternlogd	$0x96,%ymm12,%ymm11,%ymm5
-	vextracti32x4	$1,%ymm5,%xmm0
-	vpxord	%xmm0,%xmm5,%xmm5
-
-	subq	$-128,%rdx
-	addq	$-128,%rcx
-	cmpq	$128-1,%rcx
-	ja	L$aad_loop_4x__func1
-
-
-	cmpq	$32,%rcx
-	jb	L$aad_large_done__func1
-L$aad_loop_1x__func1:
-	vmovdqu8	(%rdx),%ymm0
-	vpshufb	%ymm4,%ymm0,%ymm0
-	vpxord	%ymm0,%ymm5,%ymm5
-	vpclmulqdq	$0x00,%ymm9,%ymm5,%ymm0
-	vpclmulqdq	$0x01,%ymm9,%ymm5,%ymm1
-	vpclmulqdq	$0x10,%ymm9,%ymm5,%ymm2
-	vpxord	%ymm2,%ymm1,%ymm1
-	vpclmulqdq	$0x01,%ymm0,%ymm10,%ymm2
-	vpshufd	$0x4e,%ymm0,%ymm0
-	vpternlogd	$0x96,%ymm2,%ymm0,%ymm1
-	vpclmulqdq	$0x11,%ymm9,%ymm5,%ymm5
-	vpclmulqdq	$0x01,%ymm1,%ymm10,%ymm0
-	vpshufd	$0x4e,%ymm1,%ymm1
-	vpternlogd	$0x96,%ymm0,%ymm1,%ymm5
-
-	vextracti32x4	$1,%ymm5,%xmm0
-	vpxord	%xmm0,%xmm5,%xmm5
-
-	addq	$32,%rdx
-	subq	$32,%rcx
-	cmpq	$32,%rcx
-	jae	L$aad_loop_1x__func1
-
-L$aad_large_done__func1:
-
-
-	vzeroupper
-
-
-L$aad_blockbyblock__func1:
-	testq	%rcx,%rcx
-	jz	L$aad_done__func1
-	vmovdqu	256-16(%rsi),%xmm9
-L$aad_loop_blockbyblock__func1:
-	vmovdqu	(%rdx),%xmm0
-	vpshufb	%xmm4,%xmm0,%xmm0
-	vpxor	%xmm0,%xmm5,%xmm5
-	vpclmulqdq	$0x00,%xmm9,%xmm5,%xmm0
-	vpclmulqdq	$0x01,%xmm9,%xmm5,%xmm1
-	vpclmulqdq	$0x10,%xmm9,%xmm5,%xmm2
-	vpxord	%xmm2,%xmm1,%xmm1
-	vpclmulqdq	$0x01,%xmm0,%xmm10,%xmm2
-	vpshufd	$0x4e,%xmm0,%xmm0
-	vpternlogd	$0x96,%xmm2,%xmm0,%xmm1
-	vpclmulqdq	$0x11,%xmm9,%xmm5,%xmm5
-	vpclmulqdq	$0x01,%xmm1,%xmm10,%xmm0
-	vpshufd	$0x4e,%xmm1,%xmm1
-	vpternlogd	$0x96,%xmm0,%xmm1,%xmm5
-
-	addq	$16,%rdx
-	subq	$16,%rcx
-	jnz	L$aad_loop_blockbyblock__func1
-
-L$aad_done__func1:
-
-	vpshufb	%xmm4,%xmm5,%xmm5
-	vmovdqu	%xmm5,(%rdi)
-	ret
-
-
-
-.globl	_aes_gcm_enc_update_vaes_avx10_256
-.private_extern _aes_gcm_enc_update_vaes_avx10_256
-
-.p2align	5
-_aes_gcm_enc_update_vaes_avx10_256:
-
-
-_CET_ENDBR
-	pushq	%r12
-
-
-	movq	16(%rsp),%r12
-#ifdef BORINGSSL_DISPATCH_TEST
-
-	movb	$1,_BORINGSSL_function_hit+6(%rip)
-#endif
-
-	vbroadcasti32x4	L$bswap_mask(%rip),%ymm8
-	vbroadcasti32x4	L$gfpoly(%rip),%ymm31
-
-
-
-	vmovdqu	(%r12),%xmm10
-	vpshufb	%xmm8,%xmm10,%xmm10
-	vbroadcasti32x4	(%r8),%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm12
-
-
-
-	movl	240(%rcx),%r10d
-	leal	-20(,%r10,4),%r10d
-
-
-
-
-	leaq	96(%rcx,%r10,4),%r11
-	vbroadcasti32x4	(%rcx),%ymm13
-	vbroadcasti32x4	(%r11),%ymm14
-
-
-	vpaddd	L$ctr_pattern(%rip),%ymm12,%ymm12
-
-
-	vbroadcasti32x4	L$inc_2blocks(%rip),%ymm11
-
-
-
-	cmpq	$128-1,%rdx
-	jbe	L$crypt_loop_4x_done__func1
-
-
-	vmovdqu8	256-128(%r9),%ymm27
-	vmovdqu8	256-96(%r9),%ymm28
-	vmovdqu8	256-64(%r9),%ymm29
-	vmovdqu8	256-32(%r9),%ymm30
-
-
-
-
-	vpshufb	%ymm8,%ymm12,%ymm0
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm1
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm2
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm3
-	vpaddd	%ymm11,%ymm12,%ymm12
-
-
-	vpxord	%ymm13,%ymm0,%ymm0
-	vpxord	%ymm13,%ymm1,%ymm1
-	vpxord	%ymm13,%ymm2,%ymm2
-	vpxord	%ymm13,%ymm3,%ymm3
-
-	leaq	16(%rcx),%rax
-L$vaesenc_loop_first_4_vecs__func1:
-	vbroadcasti32x4	(%rax),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-	addq	$16,%rax
-	cmpq	%rax,%r11
-	jne	L$vaesenc_loop_first_4_vecs__func1
-
-
-
-	vpxord	0(%rdi),%ymm14,%ymm4
-	vpxord	32(%rdi),%ymm14,%ymm5
-	vpxord	64(%rdi),%ymm14,%ymm6
-	vpxord	96(%rdi),%ymm14,%ymm7
-
-
-
-	vaesenclast	%ymm4,%ymm0,%ymm4
-	vaesenclast	%ymm5,%ymm1,%ymm5
-	vaesenclast	%ymm6,%ymm2,%ymm6
-	vaesenclast	%ymm7,%ymm3,%ymm7
-
-
-	vmovdqu8	%ymm4,0(%rsi)
-	vmovdqu8	%ymm5,32(%rsi)
-	vmovdqu8	%ymm6,64(%rsi)
-	vmovdqu8	%ymm7,96(%rsi)
-
-	subq	$-128,%rdi
-	subq	$-128,%rsi
-	addq	$-128,%rdx
-	cmpq	$128-1,%rdx
-	jbe	L$ghash_last_ciphertext_4x__func1
-	vbroadcasti32x4	-144(%r11),%ymm15
-	vbroadcasti32x4	-128(%r11),%ymm16
-	vbroadcasti32x4	-112(%r11),%ymm17
-	vbroadcasti32x4	-96(%r11),%ymm18
-	vbroadcasti32x4	-80(%r11),%ymm19
-	vbroadcasti32x4	-64(%r11),%ymm20
-	vbroadcasti32x4	-48(%r11),%ymm21
-	vbroadcasti32x4	-32(%r11),%ymm22
-	vbroadcasti32x4	-16(%r11),%ymm23
-L$crypt_loop_4x__func1:
-
-
-
-	vpshufb	%ymm8,%ymm12,%ymm0
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm1
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm2
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm3
-	vpaddd	%ymm11,%ymm12,%ymm12
-
-
-	vpxord	%ymm13,%ymm0,%ymm0
-	vpxord	%ymm13,%ymm1,%ymm1
-	vpxord	%ymm13,%ymm2,%ymm2
-	vpxord	%ymm13,%ymm3,%ymm3
-
-	cmpl	$24,%r10d
-	jl	L$aes128__func1
-	je	L$aes192__func1
-
-	vbroadcasti32x4	-208(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-	vbroadcasti32x4	-192(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-L$aes192__func1:
-	vbroadcasti32x4	-176(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-	vbroadcasti32x4	-160(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-L$aes128__func1:
-	vpshufb	%ymm8,%ymm4,%ymm4
-	vpxord	%ymm10,%ymm4,%ymm4
-	vpshufb	%ymm8,%ymm5,%ymm5
-	vpshufb	%ymm8,%ymm6,%ymm6
-
-	vaesenc	%ymm15,%ymm0,%ymm0
-	vaesenc	%ymm15,%ymm1,%ymm1
-	vaesenc	%ymm15,%ymm2,%ymm2
-	vaesenc	%ymm15,%ymm3,%ymm3
-
-	vpshufb	%ymm8,%ymm7,%ymm7
-	vpclmulqdq	$0x00,%ymm27,%ymm4,%ymm10
-	vpclmulqdq	$0x00,%ymm28,%ymm5,%ymm24
-	vpclmulqdq	$0x00,%ymm29,%ymm6,%ymm25
-
-	vaesenc	%ymm16,%ymm0,%ymm0
-	vaesenc	%ymm16,%ymm1,%ymm1
-	vaesenc	%ymm16,%ymm2,%ymm2
-	vaesenc	%ymm16,%ymm3,%ymm3
-
-	vpxord	%ymm24,%ymm10,%ymm10
-	vpclmulqdq	$0x00,%ymm30,%ymm7,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm10
-	vpclmulqdq	$0x01,%ymm27,%ymm4,%ymm24
-
-	vaesenc	%ymm17,%ymm0,%ymm0
-	vaesenc	%ymm17,%ymm1,%ymm1
-	vaesenc	%ymm17,%ymm2,%ymm2
-	vaesenc	%ymm17,%ymm3,%ymm3
-
-	vpclmulqdq	$0x01,%ymm28,%ymm5,%ymm25
-	vpclmulqdq	$0x01,%ymm29,%ymm6,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x01,%ymm30,%ymm7,%ymm25
-
-	vaesenc	%ymm18,%ymm0,%ymm0
-	vaesenc	%ymm18,%ymm1,%ymm1
-	vaesenc	%ymm18,%ymm2,%ymm2
-	vaesenc	%ymm18,%ymm3,%ymm3
-
-	vpclmulqdq	$0x10,%ymm27,%ymm4,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x10,%ymm28,%ymm5,%ymm25
-	vpclmulqdq	$0x10,%ymm29,%ymm6,%ymm26
-
-	vaesenc	%ymm19,%ymm0,%ymm0
-	vaesenc	%ymm19,%ymm1,%ymm1
-	vaesenc	%ymm19,%ymm2,%ymm2
-	vaesenc	%ymm19,%ymm3,%ymm3
-
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x01,%ymm10,%ymm31,%ymm26
-	vpclmulqdq	$0x10,%ymm30,%ymm7,%ymm25
-	vpxord	%ymm25,%ymm24,%ymm24
-
-	vaesenc	%ymm20,%ymm0,%ymm0
-	vaesenc	%ymm20,%ymm1,%ymm1
-	vaesenc	%ymm20,%ymm2,%ymm2
-	vaesenc	%ymm20,%ymm3,%ymm3
-
-	vpshufd	$0x4e,%ymm10,%ymm10
-	vpclmulqdq	$0x11,%ymm27,%ymm4,%ymm4
-	vpclmulqdq	$0x11,%ymm28,%ymm5,%ymm5
-	vpclmulqdq	$0x11,%ymm29,%ymm6,%ymm6
-
-	vaesenc	%ymm21,%ymm0,%ymm0
-	vaesenc	%ymm21,%ymm1,%ymm1
-	vaesenc	%ymm21,%ymm2,%ymm2
-	vaesenc	%ymm21,%ymm3,%ymm3
-
-	vpternlogd	$0x96,%ymm26,%ymm10,%ymm24
-	vpclmulqdq	$0x11,%ymm30,%ymm7,%ymm7
-	vpternlogd	$0x96,%ymm6,%ymm5,%ymm4
-	vpclmulqdq	$0x01,%ymm24,%ymm31,%ymm25
-
-	vaesenc	%ymm22,%ymm0,%ymm0
-	vaesenc	%ymm22,%ymm1,%ymm1
-	vaesenc	%ymm22,%ymm2,%ymm2
-	vaesenc	%ymm22,%ymm3,%ymm3
-
-	vpxord	%ymm7,%ymm4,%ymm10
-	vpshufd	$0x4e,%ymm24,%ymm24
-	vpternlogd	$0x96,%ymm25,%ymm24,%ymm10
-
-	vaesenc	%ymm23,%ymm0,%ymm0
-	vaesenc	%ymm23,%ymm1,%ymm1
-	vaesenc	%ymm23,%ymm2,%ymm2
-	vaesenc	%ymm23,%ymm3,%ymm3
-
-	vextracti32x4	$1,%ymm10,%xmm4
-	vpxord	%xmm4,%xmm10,%xmm10
-
-
-
-
-	vpxord	0(%rdi),%ymm14,%ymm4
-	vpxord	32(%rdi),%ymm14,%ymm5
-	vpxord	64(%rdi),%ymm14,%ymm6
-	vpxord	96(%rdi),%ymm14,%ymm7
-
-
-
-	vaesenclast	%ymm4,%ymm0,%ymm4
-	vaesenclast	%ymm5,%ymm1,%ymm5
-	vaesenclast	%ymm6,%ymm2,%ymm6
-	vaesenclast	%ymm7,%ymm3,%ymm7
-
-
-	vmovdqu8	%ymm4,0(%rsi)
-	vmovdqu8	%ymm5,32(%rsi)
-	vmovdqu8	%ymm6,64(%rsi)
-	vmovdqu8	%ymm7,96(%rsi)
-
-	subq	$-128,%rdi
-	subq	$-128,%rsi
-	addq	$-128,%rdx
-	cmpq	$128-1,%rdx
-	ja	L$crypt_loop_4x__func1
-L$ghash_last_ciphertext_4x__func1:
-	vpshufb	%ymm8,%ymm4,%ymm4
-	vpxord	%ymm10,%ymm4,%ymm4
-	vpshufb	%ymm8,%ymm5,%ymm5
-	vpshufb	%ymm8,%ymm6,%ymm6
-	vpshufb	%ymm8,%ymm7,%ymm7
-	vpclmulqdq	$0x00,%ymm27,%ymm4,%ymm10
-	vpclmulqdq	$0x00,%ymm28,%ymm5,%ymm24
-	vpclmulqdq	$0x00,%ymm29,%ymm6,%ymm25
-	vpxord	%ymm24,%ymm10,%ymm10
-	vpclmulqdq	$0x00,%ymm30,%ymm7,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm10
-	vpclmulqdq	$0x01,%ymm27,%ymm4,%ymm24
-	vpclmulqdq	$0x01,%ymm28,%ymm5,%ymm25
-	vpclmulqdq	$0x01,%ymm29,%ymm6,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x01,%ymm30,%ymm7,%ymm25
-	vpclmulqdq	$0x10,%ymm27,%ymm4,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x10,%ymm28,%ymm5,%ymm25
-	vpclmulqdq	$0x10,%ymm29,%ymm6,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x01,%ymm10,%ymm31,%ymm26
-	vpclmulqdq	$0x10,%ymm30,%ymm7,%ymm25
-	vpxord	%ymm25,%ymm24,%ymm24
-	vpshufd	$0x4e,%ymm10,%ymm10
-	vpclmulqdq	$0x11,%ymm27,%ymm4,%ymm4
-	vpclmulqdq	$0x11,%ymm28,%ymm5,%ymm5
-	vpclmulqdq	$0x11,%ymm29,%ymm6,%ymm6
-	vpternlogd	$0x96,%ymm26,%ymm10,%ymm24
-	vpclmulqdq	$0x11,%ymm30,%ymm7,%ymm7
-	vpternlogd	$0x96,%ymm6,%ymm5,%ymm4
-	vpclmulqdq	$0x01,%ymm24,%ymm31,%ymm25
-	vpxord	%ymm7,%ymm4,%ymm10
-	vpshufd	$0x4e,%ymm24,%ymm24
-	vpternlogd	$0x96,%ymm25,%ymm24,%ymm10
-	vextracti32x4	$1,%ymm10,%xmm4
-	vpxord	%xmm4,%xmm10,%xmm10
-
-L$crypt_loop_4x_done__func1:
-
-	testq	%rdx,%rdx
-	jz	L$done__func1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-	movq	%rdx,%rax
-	negq	%rax
-	andq	$-16,%rax
-	leaq	256(%r9,%rax,1),%r8
-	vpxor	%xmm4,%xmm4,%xmm4
-	vpxor	%xmm5,%xmm5,%xmm5
-	vpxor	%xmm6,%xmm6,%xmm6
-
-	cmpq	$32,%rdx
-	jb	L$partial_vec__func1
-
-L$crypt_loop_1x__func1:
-
-
-
-	vpshufb	%ymm8,%ymm12,%ymm0
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpxord	%ymm13,%ymm0,%ymm0
-	leaq	16(%rcx),%rax
-L$vaesenc_loop_tail_full_vec__func1:
-	vbroadcasti32x4	(%rax),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	addq	$16,%rax
-	cmpq	%rax,%r11
-	jne	L$vaesenc_loop_tail_full_vec__func1
-	vaesenclast	%ymm14,%ymm0,%ymm0
-
-
-	vmovdqu8	(%rdi),%ymm1
-	vpxord	%ymm1,%ymm0,%ymm0
-	vmovdqu8	%ymm0,(%rsi)
-
-
-	vmovdqu8	(%r8),%ymm30
-	vpshufb	%ymm8,%ymm0,%ymm0
-	vpxord	%ymm10,%ymm0,%ymm0
-	vpclmulqdq	$0x00,%ymm30,%ymm0,%ymm7
-	vpclmulqdq	$0x01,%ymm30,%ymm0,%ymm1
-	vpclmulqdq	$0x10,%ymm30,%ymm0,%ymm2
-	vpclmulqdq	$0x11,%ymm30,%ymm0,%ymm3
-	vpxord	%ymm7,%ymm4,%ymm4
-	vpternlogd	$0x96,%ymm2,%ymm1,%ymm5
-	vpxord	%ymm3,%ymm6,%ymm6
-
-	vpxor	%xmm10,%xmm10,%xmm10
-
-	addq	$32,%r8
-	addq	$32,%rdi
-	addq	$32,%rsi
-	subq	$32,%rdx
-	cmpq	$32,%rdx
-	jae	L$crypt_loop_1x__func1
-
-	testq	%rdx,%rdx
-	jz	L$reduce__func1
-
-L$partial_vec__func1:
-
-
-
-
-	movq	$-1,%rax
-	bzhiq	%rdx,%rax,%rax
-	kmovd	%eax,%k1
-	addq	$15,%rdx
-	andq	$-16,%rdx
-	movq	$-1,%rax
-	bzhiq	%rdx,%rax,%rax
-	kmovd	%eax,%k2
-
-
-
-	vpshufb	%ymm8,%ymm12,%ymm0
-	vpxord	%ymm13,%ymm0,%ymm0
-	leaq	16(%rcx),%rax
-L$vaesenc_loop_tail_partialvec__func1:
-	vbroadcasti32x4	(%rax),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	addq	$16,%rax
-	cmpq	%rax,%r11
-	jne	L$vaesenc_loop_tail_partialvec__func1
-	vaesenclast	%ymm14,%ymm0,%ymm0
-
-
-	vmovdqu8	(%rdi),%ymm1{%k1}{z}
-	vpxord	%ymm1,%ymm0,%ymm0
-	vmovdqu8	%ymm0,(%rsi){%k1}
-
-
-
-
-
-
-
-
-
-
-
-
-
-	vmovdqu8	(%r8),%ymm30{%k2}{z}
-	vmovdqu8	%ymm0,%ymm1{%k1}{z}
-	vpshufb	%ymm8,%ymm1,%ymm0
-	vpxord	%ymm10,%ymm0,%ymm0
-	vpclmulqdq	$0x00,%ymm30,%ymm0,%ymm7
-	vpclmulqdq	$0x01,%ymm30,%ymm0,%ymm1
-	vpclmulqdq	$0x10,%ymm30,%ymm0,%ymm2
-	vpclmulqdq	$0x11,%ymm30,%ymm0,%ymm3
-	vpxord	%ymm7,%ymm4,%ymm4
-	vpternlogd	$0x96,%ymm2,%ymm1,%ymm5
-	vpxord	%ymm3,%ymm6,%ymm6
-
-
-L$reduce__func1:
-
-	vpclmulqdq	$0x01,%ymm4,%ymm31,%ymm0
-	vpshufd	$0x4e,%ymm4,%ymm4
-	vpternlogd	$0x96,%ymm0,%ymm4,%ymm5
-	vpclmulqdq	$0x01,%ymm5,%ymm31,%ymm0
-	vpshufd	$0x4e,%ymm5,%ymm5
-	vpternlogd	$0x96,%ymm0,%ymm5,%ymm6
-
-	vextracti32x4	$1,%ymm6,%xmm0
-	vpxord	%xmm0,%xmm6,%xmm10
-
-
-L$done__func1:
-
-	vpshufb	%xmm8,%xmm10,%xmm10
-	vmovdqu	%xmm10,(%r12)
-
-	vzeroupper
-	popq	%r12
-
-	ret
-
-
-
-.globl	_aes_gcm_dec_update_vaes_avx10_256
-.private_extern _aes_gcm_dec_update_vaes_avx10_256
-
-.p2align	5
-_aes_gcm_dec_update_vaes_avx10_256:
-
-
-_CET_ENDBR
-	pushq	%r12
-
-
-	movq	16(%rsp),%r12
-
-	vbroadcasti32x4	L$bswap_mask(%rip),%ymm8
-	vbroadcasti32x4	L$gfpoly(%rip),%ymm31
-
-
-
-	vmovdqu	(%r12),%xmm10
-	vpshufb	%xmm8,%xmm10,%xmm10
-	vbroadcasti32x4	(%r8),%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm12
-
-
-
-	movl	240(%rcx),%r10d
-	leal	-20(,%r10,4),%r10d
-
-
-
-
-	leaq	96(%rcx,%r10,4),%r11
-	vbroadcasti32x4	(%rcx),%ymm13
-	vbroadcasti32x4	(%r11),%ymm14
-
-
-	vpaddd	L$ctr_pattern(%rip),%ymm12,%ymm12
-
-
-	vbroadcasti32x4	L$inc_2blocks(%rip),%ymm11
-
-
-
-	cmpq	$128-1,%rdx
-	jbe	L$crypt_loop_4x_done__func2
-
-
-	vmovdqu8	256-128(%r9),%ymm27
-	vmovdqu8	256-96(%r9),%ymm28
-	vmovdqu8	256-64(%r9),%ymm29
-	vmovdqu8	256-32(%r9),%ymm30
-	vbroadcasti32x4	-144(%r11),%ymm15
-	vbroadcasti32x4	-128(%r11),%ymm16
-	vbroadcasti32x4	-112(%r11),%ymm17
-	vbroadcasti32x4	-96(%r11),%ymm18
-	vbroadcasti32x4	-80(%r11),%ymm19
-	vbroadcasti32x4	-64(%r11),%ymm20
-	vbroadcasti32x4	-48(%r11),%ymm21
-	vbroadcasti32x4	-32(%r11),%ymm22
-	vbroadcasti32x4	-16(%r11),%ymm23
-L$crypt_loop_4x__func2:
-	vmovdqu8	0(%rdi),%ymm4
-	vmovdqu8	32(%rdi),%ymm5
-	vmovdqu8	64(%rdi),%ymm6
-	vmovdqu8	96(%rdi),%ymm7
-
-
-
-	vpshufb	%ymm8,%ymm12,%ymm0
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm1
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm2
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm3
-	vpaddd	%ymm11,%ymm12,%ymm12
-
-
-	vpxord	%ymm13,%ymm0,%ymm0
-	vpxord	%ymm13,%ymm1,%ymm1
-	vpxord	%ymm13,%ymm2,%ymm2
-	vpxord	%ymm13,%ymm3,%ymm3
-
-	cmpl	$24,%r10d
-	jl	L$aes128__func2
-	je	L$aes192__func2
-
-	vbroadcasti32x4	-208(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-	vbroadcasti32x4	-192(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-L$aes192__func2:
-	vbroadcasti32x4	-176(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-	vbroadcasti32x4	-160(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-L$aes128__func2:
-	vpshufb	%ymm8,%ymm4,%ymm4
-	vpxord	%ymm10,%ymm4,%ymm4
-	vpshufb	%ymm8,%ymm5,%ymm5
-	vpshufb	%ymm8,%ymm6,%ymm6
-
-	vaesenc	%ymm15,%ymm0,%ymm0
-	vaesenc	%ymm15,%ymm1,%ymm1
-	vaesenc	%ymm15,%ymm2,%ymm2
-	vaesenc	%ymm15,%ymm3,%ymm3
-
-	vpshufb	%ymm8,%ymm7,%ymm7
-	vpclmulqdq	$0x00,%ymm27,%ymm4,%ymm10
-	vpclmulqdq	$0x00,%ymm28,%ymm5,%ymm24
-	vpclmulqdq	$0x00,%ymm29,%ymm6,%ymm25
-
-	vaesenc	%ymm16,%ymm0,%ymm0
-	vaesenc	%ymm16,%ymm1,%ymm1
-	vaesenc	%ymm16,%ymm2,%ymm2
-	vaesenc	%ymm16,%ymm3,%ymm3
-
-	vpxord	%ymm24,%ymm10,%ymm10
-	vpclmulqdq	$0x00,%ymm30,%ymm7,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm10
-	vpclmulqdq	$0x01,%ymm27,%ymm4,%ymm24
-
-	vaesenc	%ymm17,%ymm0,%ymm0
-	vaesenc	%ymm17,%ymm1,%ymm1
-	vaesenc	%ymm17,%ymm2,%ymm2
-	vaesenc	%ymm17,%ymm3,%ymm3
-
-	vpclmulqdq	$0x01,%ymm28,%ymm5,%ymm25
-	vpclmulqdq	$0x01,%ymm29,%ymm6,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x01,%ymm30,%ymm7,%ymm25
-
-	vaesenc	%ymm18,%ymm0,%ymm0
-	vaesenc	%ymm18,%ymm1,%ymm1
-	vaesenc	%ymm18,%ymm2,%ymm2
-	vaesenc	%ymm18,%ymm3,%ymm3
-
-	vpclmulqdq	$0x10,%ymm27,%ymm4,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x10,%ymm28,%ymm5,%ymm25
-	vpclmulqdq	$0x10,%ymm29,%ymm6,%ymm26
-
-	vaesenc	%ymm19,%ymm0,%ymm0
-	vaesenc	%ymm19,%ymm1,%ymm1
-	vaesenc	%ymm19,%ymm2,%ymm2
-	vaesenc	%ymm19,%ymm3,%ymm3
-
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x01,%ymm10,%ymm31,%ymm26
-	vpclmulqdq	$0x10,%ymm30,%ymm7,%ymm25
-	vpxord	%ymm25,%ymm24,%ymm24
-
-	vaesenc	%ymm20,%ymm0,%ymm0
-	vaesenc	%ymm20,%ymm1,%ymm1
-	vaesenc	%ymm20,%ymm2,%ymm2
-	vaesenc	%ymm20,%ymm3,%ymm3
-
-	vpshufd	$0x4e,%ymm10,%ymm10
-	vpclmulqdq	$0x11,%ymm27,%ymm4,%ymm4
-	vpclmulqdq	$0x11,%ymm28,%ymm5,%ymm5
-	vpclmulqdq	$0x11,%ymm29,%ymm6,%ymm6
-
-	vaesenc	%ymm21,%ymm0,%ymm0
-	vaesenc	%ymm21,%ymm1,%ymm1
-	vaesenc	%ymm21,%ymm2,%ymm2
-	vaesenc	%ymm21,%ymm3,%ymm3
-
-	vpternlogd	$0x96,%ymm26,%ymm10,%ymm24
-	vpclmulqdq	$0x11,%ymm30,%ymm7,%ymm7
-	vpternlogd	$0x96,%ymm6,%ymm5,%ymm4
-	vpclmulqdq	$0x01,%ymm24,%ymm31,%ymm25
-
-	vaesenc	%ymm22,%ymm0,%ymm0
-	vaesenc	%ymm22,%ymm1,%ymm1
-	vaesenc	%ymm22,%ymm2,%ymm2
-	vaesenc	%ymm22,%ymm3,%ymm3
-
-	vpxord	%ymm7,%ymm4,%ymm10
-	vpshufd	$0x4e,%ymm24,%ymm24
-	vpternlogd	$0x96,%ymm25,%ymm24,%ymm10
-
-	vaesenc	%ymm23,%ymm0,%ymm0
-	vaesenc	%ymm23,%ymm1,%ymm1
-	vaesenc	%ymm23,%ymm2,%ymm2
-	vaesenc	%ymm23,%ymm3,%ymm3
-
-	vextracti32x4	$1,%ymm10,%xmm4
-	vpxord	%xmm4,%xmm10,%xmm10
-
-
-
-
-	vpxord	0(%rdi),%ymm14,%ymm4
-	vpxord	32(%rdi),%ymm14,%ymm5
-	vpxord	64(%rdi),%ymm14,%ymm6
-	vpxord	96(%rdi),%ymm14,%ymm7
-
-
-
-	vaesenclast	%ymm4,%ymm0,%ymm4
-	vaesenclast	%ymm5,%ymm1,%ymm5
-	vaesenclast	%ymm6,%ymm2,%ymm6
-	vaesenclast	%ymm7,%ymm3,%ymm7
-
-
-	vmovdqu8	%ymm4,0(%rsi)
-	vmovdqu8	%ymm5,32(%rsi)
-	vmovdqu8	%ymm6,64(%rsi)
-	vmovdqu8	%ymm7,96(%rsi)
-
-	subq	$-128,%rdi
-	subq	$-128,%rsi
-	addq	$-128,%rdx
-	cmpq	$128-1,%rdx
-	ja	L$crypt_loop_4x__func2
-L$crypt_loop_4x_done__func2:
-
-	testq	%rdx,%rdx
-	jz	L$done__func2
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-	movq	%rdx,%rax
-	negq	%rax
-	andq	$-16,%rax
-	leaq	256(%r9,%rax,1),%r8
-	vpxor	%xmm4,%xmm4,%xmm4
-	vpxor	%xmm5,%xmm5,%xmm5
-	vpxor	%xmm6,%xmm6,%xmm6
-
-	cmpq	$32,%rdx
-	jb	L$partial_vec__func2
-
-L$crypt_loop_1x__func2:
-
-
-
-	vpshufb	%ymm8,%ymm12,%ymm0
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpxord	%ymm13,%ymm0,%ymm0
-	leaq	16(%rcx),%rax
-L$vaesenc_loop_tail_full_vec__func2:
-	vbroadcasti32x4	(%rax),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	addq	$16,%rax
-	cmpq	%rax,%r11
-	jne	L$vaesenc_loop_tail_full_vec__func2
-	vaesenclast	%ymm14,%ymm0,%ymm0
-
-
-	vmovdqu8	(%rdi),%ymm1
-	vpxord	%ymm1,%ymm0,%ymm0
-	vmovdqu8	%ymm0,(%rsi)
-
-
-	vmovdqu8	(%r8),%ymm30
-	vpshufb	%ymm8,%ymm1,%ymm0
-	vpxord	%ymm10,%ymm0,%ymm0
-	vpclmulqdq	$0x00,%ymm30,%ymm0,%ymm7
-	vpclmulqdq	$0x01,%ymm30,%ymm0,%ymm1
-	vpclmulqdq	$0x10,%ymm30,%ymm0,%ymm2
-	vpclmulqdq	$0x11,%ymm30,%ymm0,%ymm3
-	vpxord	%ymm7,%ymm4,%ymm4
-	vpternlogd	$0x96,%ymm2,%ymm1,%ymm5
-	vpxord	%ymm3,%ymm6,%ymm6
-
-	vpxor	%xmm10,%xmm10,%xmm10
-
-	addq	$32,%r8
-	addq	$32,%rdi
-	addq	$32,%rsi
-	subq	$32,%rdx
-	cmpq	$32,%rdx
-	jae	L$crypt_loop_1x__func2
-
-	testq	%rdx,%rdx
-	jz	L$reduce__func2
-
-L$partial_vec__func2:
-
-
-
-
-	movq	$-1,%rax
-	bzhiq	%rdx,%rax,%rax
-	kmovd	%eax,%k1
-	addq	$15,%rdx
-	andq	$-16,%rdx
-	movq	$-1,%rax
-	bzhiq	%rdx,%rax,%rax
-	kmovd	%eax,%k2
-
-
-
-	vpshufb	%ymm8,%ymm12,%ymm0
-	vpxord	%ymm13,%ymm0,%ymm0
-	leaq	16(%rcx),%rax
-L$vaesenc_loop_tail_partialvec__func2:
-	vbroadcasti32x4	(%rax),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	addq	$16,%rax
-	cmpq	%rax,%r11
-	jne	L$vaesenc_loop_tail_partialvec__func2
-	vaesenclast	%ymm14,%ymm0,%ymm0
-
-
-	vmovdqu8	(%rdi),%ymm1{%k1}{z}
-	vpxord	%ymm1,%ymm0,%ymm0
-	vmovdqu8	%ymm0,(%rsi){%k1}
-
-
-
-
-
-
-
-
-
-
-
-
-
-	vmovdqu8	(%r8),%ymm30{%k2}{z}
-
-	vpshufb	%ymm8,%ymm1,%ymm0
-	vpxord	%ymm10,%ymm0,%ymm0
-	vpclmulqdq	$0x00,%ymm30,%ymm0,%ymm7
-	vpclmulqdq	$0x01,%ymm30,%ymm0,%ymm1
-	vpclmulqdq	$0x10,%ymm30,%ymm0,%ymm2
-	vpclmulqdq	$0x11,%ymm30,%ymm0,%ymm3
-	vpxord	%ymm7,%ymm4,%ymm4
-	vpternlogd	$0x96,%ymm2,%ymm1,%ymm5
-	vpxord	%ymm3,%ymm6,%ymm6
-
-
-L$reduce__func2:
-
-	vpclmulqdq	$0x01,%ymm4,%ymm31,%ymm0
-	vpshufd	$0x4e,%ymm4,%ymm4
-	vpternlogd	$0x96,%ymm0,%ymm4,%ymm5
-	vpclmulqdq	$0x01,%ymm5,%ymm31,%ymm0
-	vpshufd	$0x4e,%ymm5,%ymm5
-	vpternlogd	$0x96,%ymm0,%ymm5,%ymm6
-
-	vextracti32x4	$1,%ymm6,%xmm0
-	vpxord	%xmm0,%xmm6,%xmm10
-
-
-L$done__func2:
-
-	vpshufb	%xmm8,%xmm10,%xmm10
-	vmovdqu	%xmm10,(%r12)
-
-	vzeroupper
-	popq	%r12
-
-	ret
-
-
-
 .globl	_gcm_ghash_vpclmulqdq_avx10_512
 .private_extern _gcm_ghash_vpclmulqdq_avx10_512
 
@@ -1227,7 +205,7 @@
 
 
 	cmpq	$64,%rcx
-	jb	L$aad_blockbyblock__func2
+	jb	L$aad_blockbyblock__func1
 
 
 
@@ -1238,7 +216,7 @@
 	vmovdqu8	256-64(%rsi),%zmm9
 
 	cmpq	$256-1,%rcx
-	jbe	L$aad_loop_1x__func2
+	jbe	L$aad_loop_1x__func1
 
 
 	vmovdqu8	256-256(%rsi),%zmm6
@@ -1246,7 +224,7 @@
 	vmovdqu8	256-128(%rsi),%zmm8
 
 
-L$aad_loop_4x__func2:
+L$aad_loop_4x__func1:
 	vmovdqu8	0(%rdx),%zmm0
 	vmovdqu8	64(%rdx),%zmm1
 	vmovdqu8	128(%rdx),%zmm2
@@ -1295,12 +273,12 @@
 	subq	$-256,%rdx
 	addq	$-256,%rcx
 	cmpq	$256-1,%rcx
-	ja	L$aad_loop_4x__func2
+	ja	L$aad_loop_4x__func1
 
 
 	cmpq	$64,%rcx
-	jb	L$aad_large_done__func2
-L$aad_loop_1x__func2:
+	jb	L$aad_large_done__func1
+L$aad_loop_1x__func1:
 	vmovdqu8	(%rdx),%zmm0
 	vpshufb	%zmm4,%zmm0,%zmm0
 	vpxord	%zmm0,%zmm5,%zmm5
@@ -1325,19 +303,19 @@
 	addq	$64,%rdx
 	subq	$64,%rcx
 	cmpq	$64,%rcx
-	jae	L$aad_loop_1x__func2
+	jae	L$aad_loop_1x__func1
 
-L$aad_large_done__func2:
+L$aad_large_done__func1:
 
 
 	vzeroupper
 
 
-L$aad_blockbyblock__func2:
+L$aad_blockbyblock__func1:
 	testq	%rcx,%rcx
-	jz	L$aad_done__func2
+	jz	L$aad_done__func1
 	vmovdqu	256-16(%rsi),%xmm9
-L$aad_loop_blockbyblock__func2:
+L$aad_loop_blockbyblock__func1:
 	vmovdqu	(%rdx),%xmm0
 	vpshufb	%xmm4,%xmm0,%xmm0
 	vpxor	%xmm0,%xmm5,%xmm5
@@ -1355,9 +333,9 @@
 
 	addq	$16,%rdx
 	subq	$16,%rcx
-	jnz	L$aad_loop_blockbyblock__func2
+	jnz	L$aad_loop_blockbyblock__func1
 
-L$aad_done__func2:
+L$aad_done__func1:
 
 	vpshufb	%xmm4,%xmm5,%xmm5
 	vmovdqu	%xmm5,(%rdi)
@@ -1413,7 +391,7 @@
 
 
 	cmpq	$256-1,%rdx
-	jbe	L$crypt_loop_4x_done__func3
+	jbe	L$crypt_loop_4x_done__func1
 
 
 	vmovdqu8	256-256(%r9),%zmm27
@@ -1440,7 +418,7 @@
 	vpxord	%zmm13,%zmm3,%zmm3
 
 	leaq	16(%rcx),%rax
-L$vaesenc_loop_first_4_vecs__func3:
+L$vaesenc_loop_first_4_vecs__func1:
 	vbroadcasti32x4	(%rax),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
 	vaesenc	%zmm9,%zmm1,%zmm1
@@ -1449,7 +427,7 @@
 
 	addq	$16,%rax
 	cmpq	%rax,%r11
-	jne	L$vaesenc_loop_first_4_vecs__func3
+	jne	L$vaesenc_loop_first_4_vecs__func1
 
 
 
@@ -1475,7 +453,7 @@
 	subq	$-256,%rsi
 	addq	$-256,%rdx
 	cmpq	$256-1,%rdx
-	jbe	L$ghash_last_ciphertext_4x__func3
+	jbe	L$ghash_last_ciphertext_4x__func1
 	vbroadcasti32x4	-144(%r11),%zmm15
 	vbroadcasti32x4	-128(%r11),%zmm16
 	vbroadcasti32x4	-112(%r11),%zmm17
@@ -1485,7 +463,7 @@
 	vbroadcasti32x4	-48(%r11),%zmm21
 	vbroadcasti32x4	-32(%r11),%zmm22
 	vbroadcasti32x4	-16(%r11),%zmm23
-L$crypt_loop_4x__func3:
+L$crypt_loop_4x__func1:
 
 
 
@@ -1505,8 +483,8 @@
 	vpxord	%zmm13,%zmm3,%zmm3
 
 	cmpl	$24,%r10d
-	jl	L$aes128__func3
-	je	L$aes192__func3
+	jl	L$aes128__func1
+	je	L$aes192__func1
 
 	vbroadcasti32x4	-208(%r11),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
@@ -1520,7 +498,7 @@
 	vaesenc	%zmm9,%zmm2,%zmm2
 	vaesenc	%zmm9,%zmm3,%zmm3
 
-L$aes192__func3:
+L$aes192__func1:
 	vbroadcasti32x4	-176(%r11),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
 	vaesenc	%zmm9,%zmm1,%zmm1
@@ -1533,7 +511,7 @@
 	vaesenc	%zmm9,%zmm2,%zmm2
 	vaesenc	%zmm9,%zmm3,%zmm3
 
-L$aes128__func3:
+L$aes128__func1:
 	vpshufb	%zmm8,%zmm4,%zmm4
 	vpxord	%zmm10,%zmm4,%zmm4
 	vpshufb	%zmm8,%zmm5,%zmm5
@@ -1654,8 +632,8 @@
 	subq	$-256,%rsi
 	addq	$-256,%rdx
 	cmpq	$256-1,%rdx
-	ja	L$crypt_loop_4x__func3
-L$ghash_last_ciphertext_4x__func3:
+	ja	L$crypt_loop_4x__func1
+L$ghash_last_ciphertext_4x__func1:
 	vpshufb	%zmm8,%zmm4,%zmm4
 	vpxord	%zmm10,%zmm4,%zmm4
 	vpshufb	%zmm8,%zmm5,%zmm5
@@ -1697,10 +675,10 @@
 	vpxord	%xmm4,%xmm10,%xmm10
 	vpternlogd	$0x96,%xmm5,%xmm6,%xmm10
 
-L$crypt_loop_4x_done__func3:
+L$crypt_loop_4x_done__func1:
 
 	testq	%rdx,%rdx
-	jz	L$done__func3
+	jz	L$done__func1
 
 
 
@@ -1730,9 +708,9 @@
 	vpxor	%xmm6,%xmm6,%xmm6
 
 	cmpq	$64,%rdx
-	jb	L$partial_vec__func3
+	jb	L$partial_vec__func1
 
-L$crypt_loop_1x__func3:
+L$crypt_loop_1x__func1:
 
 
 
@@ -1740,12 +718,12 @@
 	vpaddd	%zmm11,%zmm12,%zmm12
 	vpxord	%zmm13,%zmm0,%zmm0
 	leaq	16(%rcx),%rax
-L$vaesenc_loop_tail_full_vec__func3:
+L$vaesenc_loop_tail_full_vec__func1:
 	vbroadcasti32x4	(%rax),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
 	addq	$16,%rax
 	cmpq	%rax,%r11
-	jne	L$vaesenc_loop_tail_full_vec__func3
+	jne	L$vaesenc_loop_tail_full_vec__func1
 	vaesenclast	%zmm14,%zmm0,%zmm0
 
 
@@ -1772,12 +750,12 @@
 	addq	$64,%rsi
 	subq	$64,%rdx
 	cmpq	$64,%rdx
-	jae	L$crypt_loop_1x__func3
+	jae	L$crypt_loop_1x__func1
 
 	testq	%rdx,%rdx
-	jz	L$reduce__func3
+	jz	L$reduce__func1
 
-L$partial_vec__func3:
+L$partial_vec__func1:
 
 
 
@@ -1796,12 +774,12 @@
 	vpshufb	%zmm8,%zmm12,%zmm0
 	vpxord	%zmm13,%zmm0,%zmm0
 	leaq	16(%rcx),%rax
-L$vaesenc_loop_tail_partialvec__func3:
+L$vaesenc_loop_tail_partialvec__func1:
 	vbroadcasti32x4	(%rax),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
 	addq	$16,%rax
 	cmpq	%rax,%r11
-	jne	L$vaesenc_loop_tail_partialvec__func3
+	jne	L$vaesenc_loop_tail_partialvec__func1
 	vaesenclast	%zmm14,%zmm0,%zmm0
 
 
@@ -1834,7 +812,7 @@
 	vpxord	%zmm3,%zmm6,%zmm6
 
 
-L$reduce__func3:
+L$reduce__func1:
 
 	vpclmulqdq	$0x01,%zmm4,%zmm31,%zmm0
 	vpshufd	$0x4e,%zmm4,%zmm4
@@ -1850,7 +828,7 @@
 	vpternlogd	$0x96,%xmm1,%xmm2,%xmm10
 
 
-L$done__func3:
+L$done__func1:
 
 	vpshufb	%xmm8,%xmm10,%xmm10
 	vmovdqu	%xmm10,(%r12)
@@ -1906,7 +884,7 @@
 
 
 	cmpq	$256-1,%rdx
-	jbe	L$crypt_loop_4x_done__func4
+	jbe	L$crypt_loop_4x_done__func2
 
 
 	vmovdqu8	256-256(%r9),%zmm27
@@ -1922,7 +900,7 @@
 	vbroadcasti32x4	-48(%r11),%zmm21
 	vbroadcasti32x4	-32(%r11),%zmm22
 	vbroadcasti32x4	-16(%r11),%zmm23
-L$crypt_loop_4x__func4:
+L$crypt_loop_4x__func2:
 	vmovdqu8	0(%rdi),%zmm4
 	vmovdqu8	64(%rdi),%zmm5
 	vmovdqu8	128(%rdi),%zmm6
@@ -1946,8 +924,8 @@
 	vpxord	%zmm13,%zmm3,%zmm3
 
 	cmpl	$24,%r10d
-	jl	L$aes128__func4
-	je	L$aes192__func4
+	jl	L$aes128__func2
+	je	L$aes192__func2
 
 	vbroadcasti32x4	-208(%r11),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
@@ -1961,7 +939,7 @@
 	vaesenc	%zmm9,%zmm2,%zmm2
 	vaesenc	%zmm9,%zmm3,%zmm3
 
-L$aes192__func4:
+L$aes192__func2:
 	vbroadcasti32x4	-176(%r11),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
 	vaesenc	%zmm9,%zmm1,%zmm1
@@ -1974,7 +952,7 @@
 	vaesenc	%zmm9,%zmm2,%zmm2
 	vaesenc	%zmm9,%zmm3,%zmm3
 
-L$aes128__func4:
+L$aes128__func2:
 	vpshufb	%zmm8,%zmm4,%zmm4
 	vpxord	%zmm10,%zmm4,%zmm4
 	vpshufb	%zmm8,%zmm5,%zmm5
@@ -2095,11 +1073,11 @@
 	subq	$-256,%rsi
 	addq	$-256,%rdx
 	cmpq	$256-1,%rdx
-	ja	L$crypt_loop_4x__func4
-L$crypt_loop_4x_done__func4:
+	ja	L$crypt_loop_4x__func2
+L$crypt_loop_4x_done__func2:
 
 	testq	%rdx,%rdx
-	jz	L$done__func4
+	jz	L$done__func2
 
 
 
@@ -2129,9 +1107,9 @@
 	vpxor	%xmm6,%xmm6,%xmm6
 
 	cmpq	$64,%rdx
-	jb	L$partial_vec__func4
+	jb	L$partial_vec__func2
 
-L$crypt_loop_1x__func4:
+L$crypt_loop_1x__func2:
 
 
 
@@ -2139,12 +1117,12 @@
 	vpaddd	%zmm11,%zmm12,%zmm12
 	vpxord	%zmm13,%zmm0,%zmm0
 	leaq	16(%rcx),%rax
-L$vaesenc_loop_tail_full_vec__func4:
+L$vaesenc_loop_tail_full_vec__func2:
 	vbroadcasti32x4	(%rax),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
 	addq	$16,%rax
 	cmpq	%rax,%r11
-	jne	L$vaesenc_loop_tail_full_vec__func4
+	jne	L$vaesenc_loop_tail_full_vec__func2
 	vaesenclast	%zmm14,%zmm0,%zmm0
 
 
@@ -2171,12 +1149,12 @@
 	addq	$64,%rsi
 	subq	$64,%rdx
 	cmpq	$64,%rdx
-	jae	L$crypt_loop_1x__func4
+	jae	L$crypt_loop_1x__func2
 
 	testq	%rdx,%rdx
-	jz	L$reduce__func4
+	jz	L$reduce__func2
 
-L$partial_vec__func4:
+L$partial_vec__func2:
 
 
 
@@ -2195,12 +1173,12 @@
 	vpshufb	%zmm8,%zmm12,%zmm0
 	vpxord	%zmm13,%zmm0,%zmm0
 	leaq	16(%rcx),%rax
-L$vaesenc_loop_tail_partialvec__func4:
+L$vaesenc_loop_tail_partialvec__func2:
 	vbroadcasti32x4	(%rax),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
 	addq	$16,%rax
 	cmpq	%rax,%r11
-	jne	L$vaesenc_loop_tail_partialvec__func4
+	jne	L$vaesenc_loop_tail_partialvec__func2
 	vaesenclast	%zmm14,%zmm0,%zmm0
 
 
@@ -2233,7 +1211,7 @@
 	vpxord	%zmm3,%zmm6,%zmm6
 
 
-L$reduce__func4:
+L$reduce__func2:
 
 	vpclmulqdq	$0x01,%zmm4,%zmm31,%zmm0
 	vpshufd	$0x4e,%zmm4,%zmm4
@@ -2249,7 +1227,7 @@
 	vpternlogd	$0x96,%xmm1,%xmm2,%xmm10
 
 
-L$done__func4:
+L$done__func2:
 
 	vpshufb	%xmm8,%xmm10,%xmm10
 	vmovdqu	%xmm10,(%r12)
diff --git a/gen/bcm/aes-gcm-avx10-x86_64-linux.S b/gen/bcm/aes-gcm-avx10-x86_64-linux.S
index cf661c8..2be6a8c 100644
--- a/gen/bcm/aes-gcm-avx10-x86_64-linux.S
+++ b/gen/bcm/aes-gcm-avx10-x86_64-linux.S
@@ -75,16 +75,16 @@
 
 .cfi_endproc	
 .size	gcm_gmult_vpclmulqdq_avx10, . - gcm_gmult_vpclmulqdq_avx10
-.globl	gcm_init_vpclmulqdq_avx10
-.hidden gcm_init_vpclmulqdq_avx10
-.type	gcm_init_vpclmulqdq_avx10,@function
+.globl	gcm_init_vpclmulqdq_avx10_512
+.hidden gcm_init_vpclmulqdq_avx10_512
+.type	gcm_init_vpclmulqdq_avx10_512,@function
 .align	32
-gcm_init_vpclmulqdq_avx10:
+gcm_init_vpclmulqdq_avx10_512:
 .cfi_startproc	
 
 _CET_ENDBR
 
-	leaq	256-32(%rdi),%r8
+	leaq	256-64(%rdi),%r8
 
 
 
@@ -112,7 +112,7 @@
 	vpternlogd	$0x78,.Lgfpoly_and_internal_carrybit(%rip),%xmm0,%xmm3
 
 
-	vbroadcasti32x4	.Lgfpoly(%rip),%ymm5
+	vbroadcasti32x4	.Lgfpoly(%rip),%zmm5
 
 
 
@@ -137,16 +137,6 @@
 
 	vinserti128	$1,%xmm3,%ymm4,%ymm3
 	vinserti128	$1,%xmm4,%ymm4,%ymm4
-
-	vmovdqu8	%ymm3,(%r8)
-
-
-
-
-
-	movl	$7,%eax
-.Lprecompute_next__func1:
-	subq	$32,%r8
 	vpclmulqdq	$0x00,%ymm4,%ymm3,%ymm0
 	vpclmulqdq	$0x01,%ymm4,%ymm3,%ymm1
 	vpclmulqdq	$0x10,%ymm4,%ymm3,%ymm2
@@ -154,12 +144,36 @@
 	vpclmulqdq	$0x01,%ymm0,%ymm5,%ymm2
 	vpshufd	$0x4e,%ymm0,%ymm0
 	vpternlogd	$0x96,%ymm2,%ymm0,%ymm1
-	vpclmulqdq	$0x11,%ymm4,%ymm3,%ymm3
+	vpclmulqdq	$0x11,%ymm4,%ymm3,%ymm4
 	vpclmulqdq	$0x01,%ymm1,%ymm5,%ymm0
 	vpshufd	$0x4e,%ymm1,%ymm1
-	vpternlogd	$0x96,%ymm0,%ymm1,%ymm3
+	vpternlogd	$0x96,%ymm0,%ymm1,%ymm4
 
-	vmovdqu8	%ymm3,(%r8)
+	vinserti64x4	$1,%ymm3,%zmm4,%zmm3
+	vshufi64x2	$0,%zmm4,%zmm4,%zmm4
+
+	vmovdqu8	%zmm3,(%r8)
+
+
+
+
+
+	movl	$3,%eax
+.Lprecompute_next__func1:
+	subq	$64,%r8
+	vpclmulqdq	$0x00,%zmm4,%zmm3,%zmm0
+	vpclmulqdq	$0x01,%zmm4,%zmm3,%zmm1
+	vpclmulqdq	$0x10,%zmm4,%zmm3,%zmm2
+	vpxord	%zmm2,%zmm1,%zmm1
+	vpclmulqdq	$0x01,%zmm0,%zmm5,%zmm2
+	vpshufd	$0x4e,%zmm0,%zmm0
+	vpternlogd	$0x96,%zmm2,%zmm0,%zmm1
+	vpclmulqdq	$0x11,%zmm4,%zmm3,%zmm3
+	vpclmulqdq	$0x01,%zmm1,%zmm5,%zmm0
+	vpshufd	$0x4e,%zmm1,%zmm1
+	vpternlogd	$0x96,%zmm0,%zmm1,%zmm3
+
+	vmovdqu8	%zmm3,(%r8)
 	decl	%eax
 	jnz	.Lprecompute_next__func1
 
@@ -167,1048 +181,7 @@
 	ret
 
 .cfi_endproc	
-.size	gcm_init_vpclmulqdq_avx10, . - gcm_init_vpclmulqdq_avx10
-.globl	gcm_ghash_vpclmulqdq_avx10_256
-.hidden gcm_ghash_vpclmulqdq_avx10_256
-.type	gcm_ghash_vpclmulqdq_avx10_256,@function
-.align	32
-gcm_ghash_vpclmulqdq_avx10_256:
-.cfi_startproc	
-
-_CET_ENDBR
-
-
-
-
-
-
-	vmovdqu	.Lbswap_mask(%rip),%xmm4
-	vmovdqu	.Lgfpoly(%rip),%xmm10
-
-
-	vmovdqu	(%rdi),%xmm5
-	vpshufb	%xmm4,%xmm5,%xmm5
-
-
-	cmpq	$32,%rcx
-	jb	.Laad_blockbyblock__func1
-
-
-
-	vshufi64x2	$0,%ymm4,%ymm4,%ymm4
-	vshufi64x2	$0,%ymm10,%ymm10,%ymm10
-
-
-	vmovdqu8	256-32(%rsi),%ymm9
-
-	cmpq	$128-1,%rcx
-	jbe	.Laad_loop_1x__func1
-
-
-	vmovdqu8	256-128(%rsi),%ymm6
-	vmovdqu8	256-96(%rsi),%ymm7
-	vmovdqu8	256-64(%rsi),%ymm8
-
-
-.Laad_loop_4x__func1:
-	vmovdqu8	0(%rdx),%ymm0
-	vmovdqu8	32(%rdx),%ymm1
-	vmovdqu8	64(%rdx),%ymm2
-	vmovdqu8	96(%rdx),%ymm3
-	vpshufb	%ymm4,%ymm0,%ymm0
-	vpxord	%ymm5,%ymm0,%ymm0
-	vpshufb	%ymm4,%ymm1,%ymm1
-	vpshufb	%ymm4,%ymm2,%ymm2
-	vpshufb	%ymm4,%ymm3,%ymm3
-	vpclmulqdq	$0x00,%ymm6,%ymm0,%ymm5
-	vpclmulqdq	$0x00,%ymm7,%ymm1,%ymm11
-	vpclmulqdq	$0x00,%ymm8,%ymm2,%ymm12
-	vpxord	%ymm11,%ymm5,%ymm5
-	vpclmulqdq	$0x00,%ymm9,%ymm3,%ymm13
-	vpternlogd	$0x96,%ymm13,%ymm12,%ymm5
-	vpclmulqdq	$0x01,%ymm6,%ymm0,%ymm11
-	vpclmulqdq	$0x01,%ymm7,%ymm1,%ymm12
-	vpclmulqdq	$0x01,%ymm8,%ymm2,%ymm13
-	vpternlogd	$0x96,%ymm13,%ymm12,%ymm11
-	vpclmulqdq	$0x01,%ymm9,%ymm3,%ymm12
-	vpclmulqdq	$0x10,%ymm6,%ymm0,%ymm13
-	vpternlogd	$0x96,%ymm13,%ymm12,%ymm11
-	vpclmulqdq	$0x10,%ymm7,%ymm1,%ymm12
-	vpclmulqdq	$0x10,%ymm8,%ymm2,%ymm13
-	vpternlogd	$0x96,%ymm13,%ymm12,%ymm11
-	vpclmulqdq	$0x01,%ymm5,%ymm10,%ymm13
-	vpclmulqdq	$0x10,%ymm9,%ymm3,%ymm12
-	vpxord	%ymm12,%ymm11,%ymm11
-	vpshufd	$0x4e,%ymm5,%ymm5
-	vpclmulqdq	$0x11,%ymm6,%ymm0,%ymm0
-	vpclmulqdq	$0x11,%ymm7,%ymm1,%ymm1
-	vpclmulqdq	$0x11,%ymm8,%ymm2,%ymm2
-	vpternlogd	$0x96,%ymm13,%ymm5,%ymm11
-	vpclmulqdq	$0x11,%ymm9,%ymm3,%ymm3
-	vpternlogd	$0x96,%ymm2,%ymm1,%ymm0
-	vpclmulqdq	$0x01,%ymm11,%ymm10,%ymm12
-	vpxord	%ymm3,%ymm0,%ymm5
-	vpshufd	$0x4e,%ymm11,%ymm11
-	vpternlogd	$0x96,%ymm12,%ymm11,%ymm5
-	vextracti32x4	$1,%ymm5,%xmm0
-	vpxord	%xmm0,%xmm5,%xmm5
-
-	subq	$-128,%rdx
-	addq	$-128,%rcx
-	cmpq	$128-1,%rcx
-	ja	.Laad_loop_4x__func1
-
-
-	cmpq	$32,%rcx
-	jb	.Laad_large_done__func1
-.Laad_loop_1x__func1:
-	vmovdqu8	(%rdx),%ymm0
-	vpshufb	%ymm4,%ymm0,%ymm0
-	vpxord	%ymm0,%ymm5,%ymm5
-	vpclmulqdq	$0x00,%ymm9,%ymm5,%ymm0
-	vpclmulqdq	$0x01,%ymm9,%ymm5,%ymm1
-	vpclmulqdq	$0x10,%ymm9,%ymm5,%ymm2
-	vpxord	%ymm2,%ymm1,%ymm1
-	vpclmulqdq	$0x01,%ymm0,%ymm10,%ymm2
-	vpshufd	$0x4e,%ymm0,%ymm0
-	vpternlogd	$0x96,%ymm2,%ymm0,%ymm1
-	vpclmulqdq	$0x11,%ymm9,%ymm5,%ymm5
-	vpclmulqdq	$0x01,%ymm1,%ymm10,%ymm0
-	vpshufd	$0x4e,%ymm1,%ymm1
-	vpternlogd	$0x96,%ymm0,%ymm1,%ymm5
-
-	vextracti32x4	$1,%ymm5,%xmm0
-	vpxord	%xmm0,%xmm5,%xmm5
-
-	addq	$32,%rdx
-	subq	$32,%rcx
-	cmpq	$32,%rcx
-	jae	.Laad_loop_1x__func1
-
-.Laad_large_done__func1:
-
-
-	vzeroupper
-
-
-.Laad_blockbyblock__func1:
-	testq	%rcx,%rcx
-	jz	.Laad_done__func1
-	vmovdqu	256-16(%rsi),%xmm9
-.Laad_loop_blockbyblock__func1:
-	vmovdqu	(%rdx),%xmm0
-	vpshufb	%xmm4,%xmm0,%xmm0
-	vpxor	%xmm0,%xmm5,%xmm5
-	vpclmulqdq	$0x00,%xmm9,%xmm5,%xmm0
-	vpclmulqdq	$0x01,%xmm9,%xmm5,%xmm1
-	vpclmulqdq	$0x10,%xmm9,%xmm5,%xmm2
-	vpxord	%xmm2,%xmm1,%xmm1
-	vpclmulqdq	$0x01,%xmm0,%xmm10,%xmm2
-	vpshufd	$0x4e,%xmm0,%xmm0
-	vpternlogd	$0x96,%xmm2,%xmm0,%xmm1
-	vpclmulqdq	$0x11,%xmm9,%xmm5,%xmm5
-	vpclmulqdq	$0x01,%xmm1,%xmm10,%xmm0
-	vpshufd	$0x4e,%xmm1,%xmm1
-	vpternlogd	$0x96,%xmm0,%xmm1,%xmm5
-
-	addq	$16,%rdx
-	subq	$16,%rcx
-	jnz	.Laad_loop_blockbyblock__func1
-
-.Laad_done__func1:
-
-	vpshufb	%xmm4,%xmm5,%xmm5
-	vmovdqu	%xmm5,(%rdi)
-	ret
-
-.cfi_endproc	
-.size	gcm_ghash_vpclmulqdq_avx10_256, . - gcm_ghash_vpclmulqdq_avx10_256
-.globl	aes_gcm_enc_update_vaes_avx10_256
-.hidden aes_gcm_enc_update_vaes_avx10_256
-.type	aes_gcm_enc_update_vaes_avx10_256,@function
-.align	32
-aes_gcm_enc_update_vaes_avx10_256:
-.cfi_startproc	
-
-_CET_ENDBR
-	pushq	%r12
-.cfi_adjust_cfa_offset	8
-.cfi_offset	%r12,-16
-
-	movq	16(%rsp),%r12
-#ifdef BORINGSSL_DISPATCH_TEST
-.extern	BORINGSSL_function_hit
-.hidden BORINGSSL_function_hit
-	movb	$1,BORINGSSL_function_hit+6(%rip)
-#endif
-
-	vbroadcasti32x4	.Lbswap_mask(%rip),%ymm8
-	vbroadcasti32x4	.Lgfpoly(%rip),%ymm31
-
-
-
-	vmovdqu	(%r12),%xmm10
-	vpshufb	%xmm8,%xmm10,%xmm10
-	vbroadcasti32x4	(%r8),%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm12
-
-
-
-	movl	240(%rcx),%r10d
-	leal	-20(,%r10,4),%r10d
-
-
-
-
-	leaq	96(%rcx,%r10,4),%r11
-	vbroadcasti32x4	(%rcx),%ymm13
-	vbroadcasti32x4	(%r11),%ymm14
-
-
-	vpaddd	.Lctr_pattern(%rip),%ymm12,%ymm12
-
-
-	vbroadcasti32x4	.Linc_2blocks(%rip),%ymm11
-
-
-
-	cmpq	$128-1,%rdx
-	jbe	.Lcrypt_loop_4x_done__func1
-
-
-	vmovdqu8	256-128(%r9),%ymm27
-	vmovdqu8	256-96(%r9),%ymm28
-	vmovdqu8	256-64(%r9),%ymm29
-	vmovdqu8	256-32(%r9),%ymm30
-
-
-
-
-	vpshufb	%ymm8,%ymm12,%ymm0
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm1
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm2
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm3
-	vpaddd	%ymm11,%ymm12,%ymm12
-
-
-	vpxord	%ymm13,%ymm0,%ymm0
-	vpxord	%ymm13,%ymm1,%ymm1
-	vpxord	%ymm13,%ymm2,%ymm2
-	vpxord	%ymm13,%ymm3,%ymm3
-
-	leaq	16(%rcx),%rax
-.Lvaesenc_loop_first_4_vecs__func1:
-	vbroadcasti32x4	(%rax),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-	addq	$16,%rax
-	cmpq	%rax,%r11
-	jne	.Lvaesenc_loop_first_4_vecs__func1
-
-
-
-	vpxord	0(%rdi),%ymm14,%ymm4
-	vpxord	32(%rdi),%ymm14,%ymm5
-	vpxord	64(%rdi),%ymm14,%ymm6
-	vpxord	96(%rdi),%ymm14,%ymm7
-
-
-
-	vaesenclast	%ymm4,%ymm0,%ymm4
-	vaesenclast	%ymm5,%ymm1,%ymm5
-	vaesenclast	%ymm6,%ymm2,%ymm6
-	vaesenclast	%ymm7,%ymm3,%ymm7
-
-
-	vmovdqu8	%ymm4,0(%rsi)
-	vmovdqu8	%ymm5,32(%rsi)
-	vmovdqu8	%ymm6,64(%rsi)
-	vmovdqu8	%ymm7,96(%rsi)
-
-	subq	$-128,%rdi
-	subq	$-128,%rsi
-	addq	$-128,%rdx
-	cmpq	$128-1,%rdx
-	jbe	.Lghash_last_ciphertext_4x__func1
-	vbroadcasti32x4	-144(%r11),%ymm15
-	vbroadcasti32x4	-128(%r11),%ymm16
-	vbroadcasti32x4	-112(%r11),%ymm17
-	vbroadcasti32x4	-96(%r11),%ymm18
-	vbroadcasti32x4	-80(%r11),%ymm19
-	vbroadcasti32x4	-64(%r11),%ymm20
-	vbroadcasti32x4	-48(%r11),%ymm21
-	vbroadcasti32x4	-32(%r11),%ymm22
-	vbroadcasti32x4	-16(%r11),%ymm23
-.Lcrypt_loop_4x__func1:
-
-
-
-	vpshufb	%ymm8,%ymm12,%ymm0
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm1
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm2
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm3
-	vpaddd	%ymm11,%ymm12,%ymm12
-
-
-	vpxord	%ymm13,%ymm0,%ymm0
-	vpxord	%ymm13,%ymm1,%ymm1
-	vpxord	%ymm13,%ymm2,%ymm2
-	vpxord	%ymm13,%ymm3,%ymm3
-
-	cmpl	$24,%r10d
-	jl	.Laes128__func1
-	je	.Laes192__func1
-
-	vbroadcasti32x4	-208(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-	vbroadcasti32x4	-192(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-.Laes192__func1:
-	vbroadcasti32x4	-176(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-	vbroadcasti32x4	-160(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-.Laes128__func1:
-	vpshufb	%ymm8,%ymm4,%ymm4
-	vpxord	%ymm10,%ymm4,%ymm4
-	vpshufb	%ymm8,%ymm5,%ymm5
-	vpshufb	%ymm8,%ymm6,%ymm6
-
-	vaesenc	%ymm15,%ymm0,%ymm0
-	vaesenc	%ymm15,%ymm1,%ymm1
-	vaesenc	%ymm15,%ymm2,%ymm2
-	vaesenc	%ymm15,%ymm3,%ymm3
-
-	vpshufb	%ymm8,%ymm7,%ymm7
-	vpclmulqdq	$0x00,%ymm27,%ymm4,%ymm10
-	vpclmulqdq	$0x00,%ymm28,%ymm5,%ymm24
-	vpclmulqdq	$0x00,%ymm29,%ymm6,%ymm25
-
-	vaesenc	%ymm16,%ymm0,%ymm0
-	vaesenc	%ymm16,%ymm1,%ymm1
-	vaesenc	%ymm16,%ymm2,%ymm2
-	vaesenc	%ymm16,%ymm3,%ymm3
-
-	vpxord	%ymm24,%ymm10,%ymm10
-	vpclmulqdq	$0x00,%ymm30,%ymm7,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm10
-	vpclmulqdq	$0x01,%ymm27,%ymm4,%ymm24
-
-	vaesenc	%ymm17,%ymm0,%ymm0
-	vaesenc	%ymm17,%ymm1,%ymm1
-	vaesenc	%ymm17,%ymm2,%ymm2
-	vaesenc	%ymm17,%ymm3,%ymm3
-
-	vpclmulqdq	$0x01,%ymm28,%ymm5,%ymm25
-	vpclmulqdq	$0x01,%ymm29,%ymm6,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x01,%ymm30,%ymm7,%ymm25
-
-	vaesenc	%ymm18,%ymm0,%ymm0
-	vaesenc	%ymm18,%ymm1,%ymm1
-	vaesenc	%ymm18,%ymm2,%ymm2
-	vaesenc	%ymm18,%ymm3,%ymm3
-
-	vpclmulqdq	$0x10,%ymm27,%ymm4,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x10,%ymm28,%ymm5,%ymm25
-	vpclmulqdq	$0x10,%ymm29,%ymm6,%ymm26
-
-	vaesenc	%ymm19,%ymm0,%ymm0
-	vaesenc	%ymm19,%ymm1,%ymm1
-	vaesenc	%ymm19,%ymm2,%ymm2
-	vaesenc	%ymm19,%ymm3,%ymm3
-
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x01,%ymm10,%ymm31,%ymm26
-	vpclmulqdq	$0x10,%ymm30,%ymm7,%ymm25
-	vpxord	%ymm25,%ymm24,%ymm24
-
-	vaesenc	%ymm20,%ymm0,%ymm0
-	vaesenc	%ymm20,%ymm1,%ymm1
-	vaesenc	%ymm20,%ymm2,%ymm2
-	vaesenc	%ymm20,%ymm3,%ymm3
-
-	vpshufd	$0x4e,%ymm10,%ymm10
-	vpclmulqdq	$0x11,%ymm27,%ymm4,%ymm4
-	vpclmulqdq	$0x11,%ymm28,%ymm5,%ymm5
-	vpclmulqdq	$0x11,%ymm29,%ymm6,%ymm6
-
-	vaesenc	%ymm21,%ymm0,%ymm0
-	vaesenc	%ymm21,%ymm1,%ymm1
-	vaesenc	%ymm21,%ymm2,%ymm2
-	vaesenc	%ymm21,%ymm3,%ymm3
-
-	vpternlogd	$0x96,%ymm26,%ymm10,%ymm24
-	vpclmulqdq	$0x11,%ymm30,%ymm7,%ymm7
-	vpternlogd	$0x96,%ymm6,%ymm5,%ymm4
-	vpclmulqdq	$0x01,%ymm24,%ymm31,%ymm25
-
-	vaesenc	%ymm22,%ymm0,%ymm0
-	vaesenc	%ymm22,%ymm1,%ymm1
-	vaesenc	%ymm22,%ymm2,%ymm2
-	vaesenc	%ymm22,%ymm3,%ymm3
-
-	vpxord	%ymm7,%ymm4,%ymm10
-	vpshufd	$0x4e,%ymm24,%ymm24
-	vpternlogd	$0x96,%ymm25,%ymm24,%ymm10
-
-	vaesenc	%ymm23,%ymm0,%ymm0
-	vaesenc	%ymm23,%ymm1,%ymm1
-	vaesenc	%ymm23,%ymm2,%ymm2
-	vaesenc	%ymm23,%ymm3,%ymm3
-
-	vextracti32x4	$1,%ymm10,%xmm4
-	vpxord	%xmm4,%xmm10,%xmm10
-
-
-
-
-	vpxord	0(%rdi),%ymm14,%ymm4
-	vpxord	32(%rdi),%ymm14,%ymm5
-	vpxord	64(%rdi),%ymm14,%ymm6
-	vpxord	96(%rdi),%ymm14,%ymm7
-
-
-
-	vaesenclast	%ymm4,%ymm0,%ymm4
-	vaesenclast	%ymm5,%ymm1,%ymm5
-	vaesenclast	%ymm6,%ymm2,%ymm6
-	vaesenclast	%ymm7,%ymm3,%ymm7
-
-
-	vmovdqu8	%ymm4,0(%rsi)
-	vmovdqu8	%ymm5,32(%rsi)
-	vmovdqu8	%ymm6,64(%rsi)
-	vmovdqu8	%ymm7,96(%rsi)
-
-	subq	$-128,%rdi
-	subq	$-128,%rsi
-	addq	$-128,%rdx
-	cmpq	$128-1,%rdx
-	ja	.Lcrypt_loop_4x__func1
-.Lghash_last_ciphertext_4x__func1:
-	vpshufb	%ymm8,%ymm4,%ymm4
-	vpxord	%ymm10,%ymm4,%ymm4
-	vpshufb	%ymm8,%ymm5,%ymm5
-	vpshufb	%ymm8,%ymm6,%ymm6
-	vpshufb	%ymm8,%ymm7,%ymm7
-	vpclmulqdq	$0x00,%ymm27,%ymm4,%ymm10
-	vpclmulqdq	$0x00,%ymm28,%ymm5,%ymm24
-	vpclmulqdq	$0x00,%ymm29,%ymm6,%ymm25
-	vpxord	%ymm24,%ymm10,%ymm10
-	vpclmulqdq	$0x00,%ymm30,%ymm7,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm10
-	vpclmulqdq	$0x01,%ymm27,%ymm4,%ymm24
-	vpclmulqdq	$0x01,%ymm28,%ymm5,%ymm25
-	vpclmulqdq	$0x01,%ymm29,%ymm6,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x01,%ymm30,%ymm7,%ymm25
-	vpclmulqdq	$0x10,%ymm27,%ymm4,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x10,%ymm28,%ymm5,%ymm25
-	vpclmulqdq	$0x10,%ymm29,%ymm6,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x01,%ymm10,%ymm31,%ymm26
-	vpclmulqdq	$0x10,%ymm30,%ymm7,%ymm25
-	vpxord	%ymm25,%ymm24,%ymm24
-	vpshufd	$0x4e,%ymm10,%ymm10
-	vpclmulqdq	$0x11,%ymm27,%ymm4,%ymm4
-	vpclmulqdq	$0x11,%ymm28,%ymm5,%ymm5
-	vpclmulqdq	$0x11,%ymm29,%ymm6,%ymm6
-	vpternlogd	$0x96,%ymm26,%ymm10,%ymm24
-	vpclmulqdq	$0x11,%ymm30,%ymm7,%ymm7
-	vpternlogd	$0x96,%ymm6,%ymm5,%ymm4
-	vpclmulqdq	$0x01,%ymm24,%ymm31,%ymm25
-	vpxord	%ymm7,%ymm4,%ymm10
-	vpshufd	$0x4e,%ymm24,%ymm24
-	vpternlogd	$0x96,%ymm25,%ymm24,%ymm10
-	vextracti32x4	$1,%ymm10,%xmm4
-	vpxord	%xmm4,%xmm10,%xmm10
-
-.Lcrypt_loop_4x_done__func1:
-
-	testq	%rdx,%rdx
-	jz	.Ldone__func1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-	movq	%rdx,%rax
-	negq	%rax
-	andq	$-16,%rax
-	leaq	256(%r9,%rax,1),%r8
-	vpxor	%xmm4,%xmm4,%xmm4
-	vpxor	%xmm5,%xmm5,%xmm5
-	vpxor	%xmm6,%xmm6,%xmm6
-
-	cmpq	$32,%rdx
-	jb	.Lpartial_vec__func1
-
-.Lcrypt_loop_1x__func1:
-
-
-
-	vpshufb	%ymm8,%ymm12,%ymm0
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpxord	%ymm13,%ymm0,%ymm0
-	leaq	16(%rcx),%rax
-.Lvaesenc_loop_tail_full_vec__func1:
-	vbroadcasti32x4	(%rax),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	addq	$16,%rax
-	cmpq	%rax,%r11
-	jne	.Lvaesenc_loop_tail_full_vec__func1
-	vaesenclast	%ymm14,%ymm0,%ymm0
-
-
-	vmovdqu8	(%rdi),%ymm1
-	vpxord	%ymm1,%ymm0,%ymm0
-	vmovdqu8	%ymm0,(%rsi)
-
-
-	vmovdqu8	(%r8),%ymm30
-	vpshufb	%ymm8,%ymm0,%ymm0
-	vpxord	%ymm10,%ymm0,%ymm0
-	vpclmulqdq	$0x00,%ymm30,%ymm0,%ymm7
-	vpclmulqdq	$0x01,%ymm30,%ymm0,%ymm1
-	vpclmulqdq	$0x10,%ymm30,%ymm0,%ymm2
-	vpclmulqdq	$0x11,%ymm30,%ymm0,%ymm3
-	vpxord	%ymm7,%ymm4,%ymm4
-	vpternlogd	$0x96,%ymm2,%ymm1,%ymm5
-	vpxord	%ymm3,%ymm6,%ymm6
-
-	vpxor	%xmm10,%xmm10,%xmm10
-
-	addq	$32,%r8
-	addq	$32,%rdi
-	addq	$32,%rsi
-	subq	$32,%rdx
-	cmpq	$32,%rdx
-	jae	.Lcrypt_loop_1x__func1
-
-	testq	%rdx,%rdx
-	jz	.Lreduce__func1
-
-.Lpartial_vec__func1:
-
-
-
-
-	movq	$-1,%rax
-	bzhiq	%rdx,%rax,%rax
-	kmovd	%eax,%k1
-	addq	$15,%rdx
-	andq	$-16,%rdx
-	movq	$-1,%rax
-	bzhiq	%rdx,%rax,%rax
-	kmovd	%eax,%k2
-
-
-
-	vpshufb	%ymm8,%ymm12,%ymm0
-	vpxord	%ymm13,%ymm0,%ymm0
-	leaq	16(%rcx),%rax
-.Lvaesenc_loop_tail_partialvec__func1:
-	vbroadcasti32x4	(%rax),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	addq	$16,%rax
-	cmpq	%rax,%r11
-	jne	.Lvaesenc_loop_tail_partialvec__func1
-	vaesenclast	%ymm14,%ymm0,%ymm0
-
-
-	vmovdqu8	(%rdi),%ymm1{%k1}{z}
-	vpxord	%ymm1,%ymm0,%ymm0
-	vmovdqu8	%ymm0,(%rsi){%k1}
-
-
-
-
-
-
-
-
-
-
-
-
-
-	vmovdqu8	(%r8),%ymm30{%k2}{z}
-	vmovdqu8	%ymm0,%ymm1{%k1}{z}
-	vpshufb	%ymm8,%ymm1,%ymm0
-	vpxord	%ymm10,%ymm0,%ymm0
-	vpclmulqdq	$0x00,%ymm30,%ymm0,%ymm7
-	vpclmulqdq	$0x01,%ymm30,%ymm0,%ymm1
-	vpclmulqdq	$0x10,%ymm30,%ymm0,%ymm2
-	vpclmulqdq	$0x11,%ymm30,%ymm0,%ymm3
-	vpxord	%ymm7,%ymm4,%ymm4
-	vpternlogd	$0x96,%ymm2,%ymm1,%ymm5
-	vpxord	%ymm3,%ymm6,%ymm6
-
-
-.Lreduce__func1:
-
-	vpclmulqdq	$0x01,%ymm4,%ymm31,%ymm0
-	vpshufd	$0x4e,%ymm4,%ymm4
-	vpternlogd	$0x96,%ymm0,%ymm4,%ymm5
-	vpclmulqdq	$0x01,%ymm5,%ymm31,%ymm0
-	vpshufd	$0x4e,%ymm5,%ymm5
-	vpternlogd	$0x96,%ymm0,%ymm5,%ymm6
-
-	vextracti32x4	$1,%ymm6,%xmm0
-	vpxord	%xmm0,%xmm6,%xmm10
-
-
-.Ldone__func1:
-
-	vpshufb	%xmm8,%xmm10,%xmm10
-	vmovdqu	%xmm10,(%r12)
-
-	vzeroupper
-	popq	%r12
-.cfi_adjust_cfa_offset	-8
-.cfi_restore	%r12
-	ret
-
-.cfi_endproc	
-.size	aes_gcm_enc_update_vaes_avx10_256, . - aes_gcm_enc_update_vaes_avx10_256
-.globl	aes_gcm_dec_update_vaes_avx10_256
-.hidden aes_gcm_dec_update_vaes_avx10_256
-.type	aes_gcm_dec_update_vaes_avx10_256,@function
-.align	32
-aes_gcm_dec_update_vaes_avx10_256:
-.cfi_startproc	
-
-_CET_ENDBR
-	pushq	%r12
-.cfi_adjust_cfa_offset	8
-.cfi_offset	%r12,-16
-
-	movq	16(%rsp),%r12
-
-	vbroadcasti32x4	.Lbswap_mask(%rip),%ymm8
-	vbroadcasti32x4	.Lgfpoly(%rip),%ymm31
-
-
-
-	vmovdqu	(%r12),%xmm10
-	vpshufb	%xmm8,%xmm10,%xmm10
-	vbroadcasti32x4	(%r8),%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm12
-
-
-
-	movl	240(%rcx),%r10d
-	leal	-20(,%r10,4),%r10d
-
-
-
-
-	leaq	96(%rcx,%r10,4),%r11
-	vbroadcasti32x4	(%rcx),%ymm13
-	vbroadcasti32x4	(%r11),%ymm14
-
-
-	vpaddd	.Lctr_pattern(%rip),%ymm12,%ymm12
-
-
-	vbroadcasti32x4	.Linc_2blocks(%rip),%ymm11
-
-
-
-	cmpq	$128-1,%rdx
-	jbe	.Lcrypt_loop_4x_done__func2
-
-
-	vmovdqu8	256-128(%r9),%ymm27
-	vmovdqu8	256-96(%r9),%ymm28
-	vmovdqu8	256-64(%r9),%ymm29
-	vmovdqu8	256-32(%r9),%ymm30
-	vbroadcasti32x4	-144(%r11),%ymm15
-	vbroadcasti32x4	-128(%r11),%ymm16
-	vbroadcasti32x4	-112(%r11),%ymm17
-	vbroadcasti32x4	-96(%r11),%ymm18
-	vbroadcasti32x4	-80(%r11),%ymm19
-	vbroadcasti32x4	-64(%r11),%ymm20
-	vbroadcasti32x4	-48(%r11),%ymm21
-	vbroadcasti32x4	-32(%r11),%ymm22
-	vbroadcasti32x4	-16(%r11),%ymm23
-.Lcrypt_loop_4x__func2:
-	vmovdqu8	0(%rdi),%ymm4
-	vmovdqu8	32(%rdi),%ymm5
-	vmovdqu8	64(%rdi),%ymm6
-	vmovdqu8	96(%rdi),%ymm7
-
-
-
-	vpshufb	%ymm8,%ymm12,%ymm0
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm1
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm2
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpshufb	%ymm8,%ymm12,%ymm3
-	vpaddd	%ymm11,%ymm12,%ymm12
-
-
-	vpxord	%ymm13,%ymm0,%ymm0
-	vpxord	%ymm13,%ymm1,%ymm1
-	vpxord	%ymm13,%ymm2,%ymm2
-	vpxord	%ymm13,%ymm3,%ymm3
-
-	cmpl	$24,%r10d
-	jl	.Laes128__func2
-	je	.Laes192__func2
-
-	vbroadcasti32x4	-208(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-	vbroadcasti32x4	-192(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-.Laes192__func2:
-	vbroadcasti32x4	-176(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-	vbroadcasti32x4	-160(%r11),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	vaesenc	%ymm9,%ymm1,%ymm1
-	vaesenc	%ymm9,%ymm2,%ymm2
-	vaesenc	%ymm9,%ymm3,%ymm3
-
-.Laes128__func2:
-	vpshufb	%ymm8,%ymm4,%ymm4
-	vpxord	%ymm10,%ymm4,%ymm4
-	vpshufb	%ymm8,%ymm5,%ymm5
-	vpshufb	%ymm8,%ymm6,%ymm6
-
-	vaesenc	%ymm15,%ymm0,%ymm0
-	vaesenc	%ymm15,%ymm1,%ymm1
-	vaesenc	%ymm15,%ymm2,%ymm2
-	vaesenc	%ymm15,%ymm3,%ymm3
-
-	vpshufb	%ymm8,%ymm7,%ymm7
-	vpclmulqdq	$0x00,%ymm27,%ymm4,%ymm10
-	vpclmulqdq	$0x00,%ymm28,%ymm5,%ymm24
-	vpclmulqdq	$0x00,%ymm29,%ymm6,%ymm25
-
-	vaesenc	%ymm16,%ymm0,%ymm0
-	vaesenc	%ymm16,%ymm1,%ymm1
-	vaesenc	%ymm16,%ymm2,%ymm2
-	vaesenc	%ymm16,%ymm3,%ymm3
-
-	vpxord	%ymm24,%ymm10,%ymm10
-	vpclmulqdq	$0x00,%ymm30,%ymm7,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm10
-	vpclmulqdq	$0x01,%ymm27,%ymm4,%ymm24
-
-	vaesenc	%ymm17,%ymm0,%ymm0
-	vaesenc	%ymm17,%ymm1,%ymm1
-	vaesenc	%ymm17,%ymm2,%ymm2
-	vaesenc	%ymm17,%ymm3,%ymm3
-
-	vpclmulqdq	$0x01,%ymm28,%ymm5,%ymm25
-	vpclmulqdq	$0x01,%ymm29,%ymm6,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x01,%ymm30,%ymm7,%ymm25
-
-	vaesenc	%ymm18,%ymm0,%ymm0
-	vaesenc	%ymm18,%ymm1,%ymm1
-	vaesenc	%ymm18,%ymm2,%ymm2
-	vaesenc	%ymm18,%ymm3,%ymm3
-
-	vpclmulqdq	$0x10,%ymm27,%ymm4,%ymm26
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x10,%ymm28,%ymm5,%ymm25
-	vpclmulqdq	$0x10,%ymm29,%ymm6,%ymm26
-
-	vaesenc	%ymm19,%ymm0,%ymm0
-	vaesenc	%ymm19,%ymm1,%ymm1
-	vaesenc	%ymm19,%ymm2,%ymm2
-	vaesenc	%ymm19,%ymm3,%ymm3
-
-	vpternlogd	$0x96,%ymm26,%ymm25,%ymm24
-	vpclmulqdq	$0x01,%ymm10,%ymm31,%ymm26
-	vpclmulqdq	$0x10,%ymm30,%ymm7,%ymm25
-	vpxord	%ymm25,%ymm24,%ymm24
-
-	vaesenc	%ymm20,%ymm0,%ymm0
-	vaesenc	%ymm20,%ymm1,%ymm1
-	vaesenc	%ymm20,%ymm2,%ymm2
-	vaesenc	%ymm20,%ymm3,%ymm3
-
-	vpshufd	$0x4e,%ymm10,%ymm10
-	vpclmulqdq	$0x11,%ymm27,%ymm4,%ymm4
-	vpclmulqdq	$0x11,%ymm28,%ymm5,%ymm5
-	vpclmulqdq	$0x11,%ymm29,%ymm6,%ymm6
-
-	vaesenc	%ymm21,%ymm0,%ymm0
-	vaesenc	%ymm21,%ymm1,%ymm1
-	vaesenc	%ymm21,%ymm2,%ymm2
-	vaesenc	%ymm21,%ymm3,%ymm3
-
-	vpternlogd	$0x96,%ymm26,%ymm10,%ymm24
-	vpclmulqdq	$0x11,%ymm30,%ymm7,%ymm7
-	vpternlogd	$0x96,%ymm6,%ymm5,%ymm4
-	vpclmulqdq	$0x01,%ymm24,%ymm31,%ymm25
-
-	vaesenc	%ymm22,%ymm0,%ymm0
-	vaesenc	%ymm22,%ymm1,%ymm1
-	vaesenc	%ymm22,%ymm2,%ymm2
-	vaesenc	%ymm22,%ymm3,%ymm3
-
-	vpxord	%ymm7,%ymm4,%ymm10
-	vpshufd	$0x4e,%ymm24,%ymm24
-	vpternlogd	$0x96,%ymm25,%ymm24,%ymm10
-
-	vaesenc	%ymm23,%ymm0,%ymm0
-	vaesenc	%ymm23,%ymm1,%ymm1
-	vaesenc	%ymm23,%ymm2,%ymm2
-	vaesenc	%ymm23,%ymm3,%ymm3
-
-	vextracti32x4	$1,%ymm10,%xmm4
-	vpxord	%xmm4,%xmm10,%xmm10
-
-
-
-
-	vpxord	0(%rdi),%ymm14,%ymm4
-	vpxord	32(%rdi),%ymm14,%ymm5
-	vpxord	64(%rdi),%ymm14,%ymm6
-	vpxord	96(%rdi),%ymm14,%ymm7
-
-
-
-	vaesenclast	%ymm4,%ymm0,%ymm4
-	vaesenclast	%ymm5,%ymm1,%ymm5
-	vaesenclast	%ymm6,%ymm2,%ymm6
-	vaesenclast	%ymm7,%ymm3,%ymm7
-
-
-	vmovdqu8	%ymm4,0(%rsi)
-	vmovdqu8	%ymm5,32(%rsi)
-	vmovdqu8	%ymm6,64(%rsi)
-	vmovdqu8	%ymm7,96(%rsi)
-
-	subq	$-128,%rdi
-	subq	$-128,%rsi
-	addq	$-128,%rdx
-	cmpq	$128-1,%rdx
-	ja	.Lcrypt_loop_4x__func2
-.Lcrypt_loop_4x_done__func2:
-
-	testq	%rdx,%rdx
-	jz	.Ldone__func2
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-	movq	%rdx,%rax
-	negq	%rax
-	andq	$-16,%rax
-	leaq	256(%r9,%rax,1),%r8
-	vpxor	%xmm4,%xmm4,%xmm4
-	vpxor	%xmm5,%xmm5,%xmm5
-	vpxor	%xmm6,%xmm6,%xmm6
-
-	cmpq	$32,%rdx
-	jb	.Lpartial_vec__func2
-
-.Lcrypt_loop_1x__func2:
-
-
-
-	vpshufb	%ymm8,%ymm12,%ymm0
-	vpaddd	%ymm11,%ymm12,%ymm12
-	vpxord	%ymm13,%ymm0,%ymm0
-	leaq	16(%rcx),%rax
-.Lvaesenc_loop_tail_full_vec__func2:
-	vbroadcasti32x4	(%rax),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	addq	$16,%rax
-	cmpq	%rax,%r11
-	jne	.Lvaesenc_loop_tail_full_vec__func2
-	vaesenclast	%ymm14,%ymm0,%ymm0
-
-
-	vmovdqu8	(%rdi),%ymm1
-	vpxord	%ymm1,%ymm0,%ymm0
-	vmovdqu8	%ymm0,(%rsi)
-
-
-	vmovdqu8	(%r8),%ymm30
-	vpshufb	%ymm8,%ymm1,%ymm0
-	vpxord	%ymm10,%ymm0,%ymm0
-	vpclmulqdq	$0x00,%ymm30,%ymm0,%ymm7
-	vpclmulqdq	$0x01,%ymm30,%ymm0,%ymm1
-	vpclmulqdq	$0x10,%ymm30,%ymm0,%ymm2
-	vpclmulqdq	$0x11,%ymm30,%ymm0,%ymm3
-	vpxord	%ymm7,%ymm4,%ymm4
-	vpternlogd	$0x96,%ymm2,%ymm1,%ymm5
-	vpxord	%ymm3,%ymm6,%ymm6
-
-	vpxor	%xmm10,%xmm10,%xmm10
-
-	addq	$32,%r8
-	addq	$32,%rdi
-	addq	$32,%rsi
-	subq	$32,%rdx
-	cmpq	$32,%rdx
-	jae	.Lcrypt_loop_1x__func2
-
-	testq	%rdx,%rdx
-	jz	.Lreduce__func2
-
-.Lpartial_vec__func2:
-
-
-
-
-	movq	$-1,%rax
-	bzhiq	%rdx,%rax,%rax
-	kmovd	%eax,%k1
-	addq	$15,%rdx
-	andq	$-16,%rdx
-	movq	$-1,%rax
-	bzhiq	%rdx,%rax,%rax
-	kmovd	%eax,%k2
-
-
-
-	vpshufb	%ymm8,%ymm12,%ymm0
-	vpxord	%ymm13,%ymm0,%ymm0
-	leaq	16(%rcx),%rax
-.Lvaesenc_loop_tail_partialvec__func2:
-	vbroadcasti32x4	(%rax),%ymm9
-	vaesenc	%ymm9,%ymm0,%ymm0
-	addq	$16,%rax
-	cmpq	%rax,%r11
-	jne	.Lvaesenc_loop_tail_partialvec__func2
-	vaesenclast	%ymm14,%ymm0,%ymm0
-
-
-	vmovdqu8	(%rdi),%ymm1{%k1}{z}
-	vpxord	%ymm1,%ymm0,%ymm0
-	vmovdqu8	%ymm0,(%rsi){%k1}
-
-
-
-
-
-
-
-
-
-
-
-
-
-	vmovdqu8	(%r8),%ymm30{%k2}{z}
-
-	vpshufb	%ymm8,%ymm1,%ymm0
-	vpxord	%ymm10,%ymm0,%ymm0
-	vpclmulqdq	$0x00,%ymm30,%ymm0,%ymm7
-	vpclmulqdq	$0x01,%ymm30,%ymm0,%ymm1
-	vpclmulqdq	$0x10,%ymm30,%ymm0,%ymm2
-	vpclmulqdq	$0x11,%ymm30,%ymm0,%ymm3
-	vpxord	%ymm7,%ymm4,%ymm4
-	vpternlogd	$0x96,%ymm2,%ymm1,%ymm5
-	vpxord	%ymm3,%ymm6,%ymm6
-
-
-.Lreduce__func2:
-
-	vpclmulqdq	$0x01,%ymm4,%ymm31,%ymm0
-	vpshufd	$0x4e,%ymm4,%ymm4
-	vpternlogd	$0x96,%ymm0,%ymm4,%ymm5
-	vpclmulqdq	$0x01,%ymm5,%ymm31,%ymm0
-	vpshufd	$0x4e,%ymm5,%ymm5
-	vpternlogd	$0x96,%ymm0,%ymm5,%ymm6
-
-	vextracti32x4	$1,%ymm6,%xmm0
-	vpxord	%xmm0,%xmm6,%xmm10
-
-
-.Ldone__func2:
-
-	vpshufb	%xmm8,%xmm10,%xmm10
-	vmovdqu	%xmm10,(%r12)
-
-	vzeroupper
-	popq	%r12
-.cfi_adjust_cfa_offset	-8
-.cfi_restore	%r12
-	ret
-
-.cfi_endproc	
-.size	aes_gcm_dec_update_vaes_avx10_256, . - aes_gcm_dec_update_vaes_avx10_256
+.size	gcm_init_vpclmulqdq_avx10_512, . - gcm_init_vpclmulqdq_avx10_512
 .globl	gcm_ghash_vpclmulqdq_avx10_512
 .hidden gcm_ghash_vpclmulqdq_avx10_512
 .type	gcm_ghash_vpclmulqdq_avx10_512,@function
@@ -1232,7 +205,7 @@
 
 
 	cmpq	$64,%rcx
-	jb	.Laad_blockbyblock__func2
+	jb	.Laad_blockbyblock__func1
 
 
 
@@ -1243,7 +216,7 @@
 	vmovdqu8	256-64(%rsi),%zmm9
 
 	cmpq	$256-1,%rcx
-	jbe	.Laad_loop_1x__func2
+	jbe	.Laad_loop_1x__func1
 
 
 	vmovdqu8	256-256(%rsi),%zmm6
@@ -1251,7 +224,7 @@
 	vmovdqu8	256-128(%rsi),%zmm8
 
 
-.Laad_loop_4x__func2:
+.Laad_loop_4x__func1:
 	vmovdqu8	0(%rdx),%zmm0
 	vmovdqu8	64(%rdx),%zmm1
 	vmovdqu8	128(%rdx),%zmm2
@@ -1300,12 +273,12 @@
 	subq	$-256,%rdx
 	addq	$-256,%rcx
 	cmpq	$256-1,%rcx
-	ja	.Laad_loop_4x__func2
+	ja	.Laad_loop_4x__func1
 
 
 	cmpq	$64,%rcx
-	jb	.Laad_large_done__func2
-.Laad_loop_1x__func2:
+	jb	.Laad_large_done__func1
+.Laad_loop_1x__func1:
 	vmovdqu8	(%rdx),%zmm0
 	vpshufb	%zmm4,%zmm0,%zmm0
 	vpxord	%zmm0,%zmm5,%zmm5
@@ -1330,19 +303,19 @@
 	addq	$64,%rdx
 	subq	$64,%rcx
 	cmpq	$64,%rcx
-	jae	.Laad_loop_1x__func2
+	jae	.Laad_loop_1x__func1
 
-.Laad_large_done__func2:
+.Laad_large_done__func1:
 
 
 	vzeroupper
 
 
-.Laad_blockbyblock__func2:
+.Laad_blockbyblock__func1:
 	testq	%rcx,%rcx
-	jz	.Laad_done__func2
+	jz	.Laad_done__func1
 	vmovdqu	256-16(%rsi),%xmm9
-.Laad_loop_blockbyblock__func2:
+.Laad_loop_blockbyblock__func1:
 	vmovdqu	(%rdx),%xmm0
 	vpshufb	%xmm4,%xmm0,%xmm0
 	vpxor	%xmm0,%xmm5,%xmm5
@@ -1360,9 +333,9 @@
 
 	addq	$16,%rdx
 	subq	$16,%rcx
-	jnz	.Laad_loop_blockbyblock__func2
+	jnz	.Laad_loop_blockbyblock__func1
 
-.Laad_done__func2:
+.Laad_done__func1:
 
 	vpshufb	%xmm4,%xmm5,%xmm5
 	vmovdqu	%xmm5,(%rdi)
@@ -1420,7 +393,7 @@
 
 
 	cmpq	$256-1,%rdx
-	jbe	.Lcrypt_loop_4x_done__func3
+	jbe	.Lcrypt_loop_4x_done__func1
 
 
 	vmovdqu8	256-256(%r9),%zmm27
@@ -1447,7 +420,7 @@
 	vpxord	%zmm13,%zmm3,%zmm3
 
 	leaq	16(%rcx),%rax
-.Lvaesenc_loop_first_4_vecs__func3:
+.Lvaesenc_loop_first_4_vecs__func1:
 	vbroadcasti32x4	(%rax),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
 	vaesenc	%zmm9,%zmm1,%zmm1
@@ -1456,7 +429,7 @@
 
 	addq	$16,%rax
 	cmpq	%rax,%r11
-	jne	.Lvaesenc_loop_first_4_vecs__func3
+	jne	.Lvaesenc_loop_first_4_vecs__func1
 
 
 
@@ -1482,7 +455,7 @@
 	subq	$-256,%rsi
 	addq	$-256,%rdx
 	cmpq	$256-1,%rdx
-	jbe	.Lghash_last_ciphertext_4x__func3
+	jbe	.Lghash_last_ciphertext_4x__func1
 	vbroadcasti32x4	-144(%r11),%zmm15
 	vbroadcasti32x4	-128(%r11),%zmm16
 	vbroadcasti32x4	-112(%r11),%zmm17
@@ -1492,7 +465,7 @@
 	vbroadcasti32x4	-48(%r11),%zmm21
 	vbroadcasti32x4	-32(%r11),%zmm22
 	vbroadcasti32x4	-16(%r11),%zmm23
-.Lcrypt_loop_4x__func3:
+.Lcrypt_loop_4x__func1:
 
 
 
@@ -1512,8 +485,8 @@
 	vpxord	%zmm13,%zmm3,%zmm3
 
 	cmpl	$24,%r10d
-	jl	.Laes128__func3
-	je	.Laes192__func3
+	jl	.Laes128__func1
+	je	.Laes192__func1
 
 	vbroadcasti32x4	-208(%r11),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
@@ -1527,7 +500,7 @@
 	vaesenc	%zmm9,%zmm2,%zmm2
 	vaesenc	%zmm9,%zmm3,%zmm3
 
-.Laes192__func3:
+.Laes192__func1:
 	vbroadcasti32x4	-176(%r11),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
 	vaesenc	%zmm9,%zmm1,%zmm1
@@ -1540,7 +513,7 @@
 	vaesenc	%zmm9,%zmm2,%zmm2
 	vaesenc	%zmm9,%zmm3,%zmm3
 
-.Laes128__func3:
+.Laes128__func1:
 	vpshufb	%zmm8,%zmm4,%zmm4
 	vpxord	%zmm10,%zmm4,%zmm4
 	vpshufb	%zmm8,%zmm5,%zmm5
@@ -1661,8 +634,8 @@
 	subq	$-256,%rsi
 	addq	$-256,%rdx
 	cmpq	$256-1,%rdx
-	ja	.Lcrypt_loop_4x__func3
-.Lghash_last_ciphertext_4x__func3:
+	ja	.Lcrypt_loop_4x__func1
+.Lghash_last_ciphertext_4x__func1:
 	vpshufb	%zmm8,%zmm4,%zmm4
 	vpxord	%zmm10,%zmm4,%zmm4
 	vpshufb	%zmm8,%zmm5,%zmm5
@@ -1704,10 +677,10 @@
 	vpxord	%xmm4,%xmm10,%xmm10
 	vpternlogd	$0x96,%xmm5,%xmm6,%xmm10
 
-.Lcrypt_loop_4x_done__func3:
+.Lcrypt_loop_4x_done__func1:
 
 	testq	%rdx,%rdx
-	jz	.Ldone__func3
+	jz	.Ldone__func1
 
 
 
@@ -1737,9 +710,9 @@
 	vpxor	%xmm6,%xmm6,%xmm6
 
 	cmpq	$64,%rdx
-	jb	.Lpartial_vec__func3
+	jb	.Lpartial_vec__func1
 
-.Lcrypt_loop_1x__func3:
+.Lcrypt_loop_1x__func1:
 
 
 
@@ -1747,12 +720,12 @@
 	vpaddd	%zmm11,%zmm12,%zmm12
 	vpxord	%zmm13,%zmm0,%zmm0
 	leaq	16(%rcx),%rax
-.Lvaesenc_loop_tail_full_vec__func3:
+.Lvaesenc_loop_tail_full_vec__func1:
 	vbroadcasti32x4	(%rax),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
 	addq	$16,%rax
 	cmpq	%rax,%r11
-	jne	.Lvaesenc_loop_tail_full_vec__func3
+	jne	.Lvaesenc_loop_tail_full_vec__func1
 	vaesenclast	%zmm14,%zmm0,%zmm0
 
 
@@ -1779,12 +752,12 @@
 	addq	$64,%rsi
 	subq	$64,%rdx
 	cmpq	$64,%rdx
-	jae	.Lcrypt_loop_1x__func3
+	jae	.Lcrypt_loop_1x__func1
 
 	testq	%rdx,%rdx
-	jz	.Lreduce__func3
+	jz	.Lreduce__func1
 
-.Lpartial_vec__func3:
+.Lpartial_vec__func1:
 
 
 
@@ -1803,12 +776,12 @@
 	vpshufb	%zmm8,%zmm12,%zmm0
 	vpxord	%zmm13,%zmm0,%zmm0
 	leaq	16(%rcx),%rax
-.Lvaesenc_loop_tail_partialvec__func3:
+.Lvaesenc_loop_tail_partialvec__func1:
 	vbroadcasti32x4	(%rax),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
 	addq	$16,%rax
 	cmpq	%rax,%r11
-	jne	.Lvaesenc_loop_tail_partialvec__func3
+	jne	.Lvaesenc_loop_tail_partialvec__func1
 	vaesenclast	%zmm14,%zmm0,%zmm0
 
 
@@ -1841,7 +814,7 @@
 	vpxord	%zmm3,%zmm6,%zmm6
 
 
-.Lreduce__func3:
+.Lreduce__func1:
 
 	vpclmulqdq	$0x01,%zmm4,%zmm31,%zmm0
 	vpshufd	$0x4e,%zmm4,%zmm4
@@ -1857,7 +830,7 @@
 	vpternlogd	$0x96,%xmm1,%xmm2,%xmm10
 
 
-.Ldone__func3:
+.Ldone__func1:
 
 	vpshufb	%xmm8,%xmm10,%xmm10
 	vmovdqu	%xmm10,(%r12)
@@ -1915,7 +888,7 @@
 
 
 	cmpq	$256-1,%rdx
-	jbe	.Lcrypt_loop_4x_done__func4
+	jbe	.Lcrypt_loop_4x_done__func2
 
 
 	vmovdqu8	256-256(%r9),%zmm27
@@ -1931,7 +904,7 @@
 	vbroadcasti32x4	-48(%r11),%zmm21
 	vbroadcasti32x4	-32(%r11),%zmm22
 	vbroadcasti32x4	-16(%r11),%zmm23
-.Lcrypt_loop_4x__func4:
+.Lcrypt_loop_4x__func2:
 	vmovdqu8	0(%rdi),%zmm4
 	vmovdqu8	64(%rdi),%zmm5
 	vmovdqu8	128(%rdi),%zmm6
@@ -1955,8 +928,8 @@
 	vpxord	%zmm13,%zmm3,%zmm3
 
 	cmpl	$24,%r10d
-	jl	.Laes128__func4
-	je	.Laes192__func4
+	jl	.Laes128__func2
+	je	.Laes192__func2
 
 	vbroadcasti32x4	-208(%r11),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
@@ -1970,7 +943,7 @@
 	vaesenc	%zmm9,%zmm2,%zmm2
 	vaesenc	%zmm9,%zmm3,%zmm3
 
-.Laes192__func4:
+.Laes192__func2:
 	vbroadcasti32x4	-176(%r11),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
 	vaesenc	%zmm9,%zmm1,%zmm1
@@ -1983,7 +956,7 @@
 	vaesenc	%zmm9,%zmm2,%zmm2
 	vaesenc	%zmm9,%zmm3,%zmm3
 
-.Laes128__func4:
+.Laes128__func2:
 	vpshufb	%zmm8,%zmm4,%zmm4
 	vpxord	%zmm10,%zmm4,%zmm4
 	vpshufb	%zmm8,%zmm5,%zmm5
@@ -2104,11 +1077,11 @@
 	subq	$-256,%rsi
 	addq	$-256,%rdx
 	cmpq	$256-1,%rdx
-	ja	.Lcrypt_loop_4x__func4
-.Lcrypt_loop_4x_done__func4:
+	ja	.Lcrypt_loop_4x__func2
+.Lcrypt_loop_4x_done__func2:
 
 	testq	%rdx,%rdx
-	jz	.Ldone__func4
+	jz	.Ldone__func2
 
 
 
@@ -2138,9 +1111,9 @@
 	vpxor	%xmm6,%xmm6,%xmm6
 
 	cmpq	$64,%rdx
-	jb	.Lpartial_vec__func4
+	jb	.Lpartial_vec__func2
 
-.Lcrypt_loop_1x__func4:
+.Lcrypt_loop_1x__func2:
 
 
 
@@ -2148,12 +1121,12 @@
 	vpaddd	%zmm11,%zmm12,%zmm12
 	vpxord	%zmm13,%zmm0,%zmm0
 	leaq	16(%rcx),%rax
-.Lvaesenc_loop_tail_full_vec__func4:
+.Lvaesenc_loop_tail_full_vec__func2:
 	vbroadcasti32x4	(%rax),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
 	addq	$16,%rax
 	cmpq	%rax,%r11
-	jne	.Lvaesenc_loop_tail_full_vec__func4
+	jne	.Lvaesenc_loop_tail_full_vec__func2
 	vaesenclast	%zmm14,%zmm0,%zmm0
 
 
@@ -2180,12 +1153,12 @@
 	addq	$64,%rsi
 	subq	$64,%rdx
 	cmpq	$64,%rdx
-	jae	.Lcrypt_loop_1x__func4
+	jae	.Lcrypt_loop_1x__func2
 
 	testq	%rdx,%rdx
-	jz	.Lreduce__func4
+	jz	.Lreduce__func2
 
-.Lpartial_vec__func4:
+.Lpartial_vec__func2:
 
 
 
@@ -2204,12 +1177,12 @@
 	vpshufb	%zmm8,%zmm12,%zmm0
 	vpxord	%zmm13,%zmm0,%zmm0
 	leaq	16(%rcx),%rax
-.Lvaesenc_loop_tail_partialvec__func4:
+.Lvaesenc_loop_tail_partialvec__func2:
 	vbroadcasti32x4	(%rax),%zmm9
 	vaesenc	%zmm9,%zmm0,%zmm0
 	addq	$16,%rax
 	cmpq	%rax,%r11
-	jne	.Lvaesenc_loop_tail_partialvec__func4
+	jne	.Lvaesenc_loop_tail_partialvec__func2
 	vaesenclast	%zmm14,%zmm0,%zmm0
 
 
@@ -2242,7 +1215,7 @@
 	vpxord	%zmm3,%zmm6,%zmm6
 
 
-.Lreduce__func4:
+.Lreduce__func2:
 
 	vpclmulqdq	$0x01,%zmm4,%zmm31,%zmm0
 	vpshufd	$0x4e,%zmm4,%zmm4
@@ -2258,7 +1231,7 @@
 	vpternlogd	$0x96,%xmm1,%xmm2,%xmm10
 
 
-.Ldone__func4:
+.Ldone__func2:
 
 	vpshufb	%xmm8,%xmm10,%xmm10
 	vmovdqu	%xmm10,(%r12)
diff --git a/gen/bcm/aes-gcm-avx10-x86_64-win.asm b/gen/bcm/aes-gcm-avx10-x86_64-win.asm
index 258f923..fb9f896 100644
--- a/gen/bcm/aes-gcm-avx10-x86_64-win.asm
+++ b/gen/bcm/aes-gcm-avx10-x86_64-win.asm
@@ -88,15 +88,15 @@
 $L$SEH_end_gcm_gmult_vpclmulqdq_avx10_5:
 
 
-global	gcm_init_vpclmulqdq_avx10
+global	gcm_init_vpclmulqdq_avx10_512
 
 ALIGN	32
-gcm_init_vpclmulqdq_avx10:
+gcm_init_vpclmulqdq_avx10_512:
 
 
 _CET_ENDBR
 
-	lea	r8,[((256-32))+rcx]
+	lea	r8,[((256-64))+rcx]
 
 
 
@@ -124,7 +124,7 @@
 	vpternlogd	xmm3,xmm0,XMMWORD[$L$gfpoly_and_internal_carrybit],0x78
 
 
-	vbroadcasti32x4	ymm5,YMMWORD[$L$gfpoly]
+	vbroadcasti32x4	zmm5,ZMMWORD[$L$gfpoly]
 
 
 
@@ -149,16 +149,6 @@
 
 	vinserti128	ymm3,ymm4,xmm3,1
 	vinserti128	ymm4,ymm4,xmm4,1
-
-	vmovdqu8	YMMWORD[r8],ymm3
-
-
-
-
-
-	mov	eax,7
-$L$precompute_next__func1:
-	sub	r8,32
 	vpclmulqdq	ymm0,ymm3,ymm4,0x00
 	vpclmulqdq	ymm1,ymm3,ymm4,0x01
 	vpclmulqdq	ymm2,ymm3,ymm4,0x10
@@ -166,12 +156,36 @@
 	vpclmulqdq	ymm2,ymm5,ymm0,0x01
 	vpshufd	ymm0,ymm0,0x4e
 	vpternlogd	ymm1,ymm0,ymm2,0x96
-	vpclmulqdq	ymm3,ymm3,ymm4,0x11
+	vpclmulqdq	ymm4,ymm3,ymm4,0x11
 	vpclmulqdq	ymm0,ymm5,ymm1,0x01
 	vpshufd	ymm1,ymm1,0x4e
-	vpternlogd	ymm3,ymm1,ymm0,0x96
+	vpternlogd	ymm4,ymm1,ymm0,0x96
 
-	vmovdqu8	YMMWORD[r8],ymm3
+	vinserti64x4	zmm3,zmm4,ymm3,1
+	vshufi64x2	zmm4,zmm4,zmm4,0
+
+	vmovdqu8	ZMMWORD[r8],zmm3
+
+
+
+
+
+	mov	eax,3
+$L$precompute_next__func1:
+	sub	r8,64
+	vpclmulqdq	zmm0,zmm3,zmm4,0x00
+	vpclmulqdq	zmm1,zmm3,zmm4,0x01
+	vpclmulqdq	zmm2,zmm3,zmm4,0x10
+	vpxord	zmm1,zmm1,zmm2
+	vpclmulqdq	zmm2,zmm5,zmm0,0x01
+	vpshufd	zmm0,zmm0,0x4e
+	vpternlogd	zmm1,zmm0,zmm2,0x96
+	vpclmulqdq	zmm3,zmm3,zmm4,0x11
+	vpclmulqdq	zmm0,zmm5,zmm1,0x01
+	vpshufd	zmm1,zmm1,0x4e
+	vpternlogd	zmm3,zmm1,zmm0,0x96
+
+	vmovdqu8	ZMMWORD[r8],zmm3
 	dec	eax
 	jnz	NEAR $L$precompute_next__func1
 
@@ -180,1150 +194,6 @@
 
 
 
-global	gcm_ghash_vpclmulqdq_avx10_256
-
-ALIGN	32
-gcm_ghash_vpclmulqdq_avx10_256:
-
-$L$SEH_begin_gcm_ghash_vpclmulqdq_avx10_256_1:
-_CET_ENDBR
-	sub	rsp,136
-$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_2:
-	movdqa	XMMWORD[rsp],xmm6
-$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_3:
-	movdqa	XMMWORD[16+rsp],xmm7
-$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_4:
-	movdqa	XMMWORD[32+rsp],xmm8
-$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_5:
-	movdqa	XMMWORD[48+rsp],xmm9
-$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_6:
-	movdqa	XMMWORD[64+rsp],xmm10
-$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_7:
-	movdqa	XMMWORD[80+rsp],xmm11
-$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_8:
-	movdqa	XMMWORD[96+rsp],xmm12
-$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_9:
-	movdqa	XMMWORD[112+rsp],xmm13
-$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_10:
-
-$L$SEH_endprologue_gcm_ghash_vpclmulqdq_avx10_256_11:
-
-
-
-
-	vmovdqu	xmm4,XMMWORD[$L$bswap_mask]
-	vmovdqu	xmm10,XMMWORD[$L$gfpoly]
-
-
-	vmovdqu	xmm5,XMMWORD[rcx]
-	vpshufb	xmm5,xmm5,xmm4
-
-
-	cmp	r9,32
-	jb	NEAR $L$aad_blockbyblock__func1
-
-
-
-	vshufi64x2	ymm4,ymm4,ymm4,0
-	vshufi64x2	ymm10,ymm10,ymm10,0
-
-
-	vmovdqu8	ymm9,YMMWORD[((256-32))+rdx]
-
-	cmp	r9,4*32-1
-	jbe	NEAR $L$aad_loop_1x__func1
-
-
-	vmovdqu8	ymm6,YMMWORD[((256-128))+rdx]
-	vmovdqu8	ymm7,YMMWORD[((256-96))+rdx]
-	vmovdqu8	ymm8,YMMWORD[((256-64))+rdx]
-
-
-$L$aad_loop_4x__func1:
-	vmovdqu8	ymm0,YMMWORD[r8]
-	vmovdqu8	ymm1,YMMWORD[32+r8]
-	vmovdqu8	ymm2,YMMWORD[64+r8]
-	vmovdqu8	ymm3,YMMWORD[96+r8]
-	vpshufb	ymm0,ymm0,ymm4
-	vpxord	ymm0,ymm0,ymm5
-	vpshufb	ymm1,ymm1,ymm4
-	vpshufb	ymm2,ymm2,ymm4
-	vpshufb	ymm3,ymm3,ymm4
-	vpclmulqdq	ymm5,ymm0,ymm6,0x00
-	vpclmulqdq	ymm11,ymm1,ymm7,0x00
-	vpclmulqdq	ymm12,ymm2,ymm8,0x00
-	vpxord	ymm5,ymm5,ymm11
-	vpclmulqdq	ymm13,ymm3,ymm9,0x00
-	vpternlogd	ymm5,ymm12,ymm13,0x96
-	vpclmulqdq	ymm11,ymm0,ymm6,0x01
-	vpclmulqdq	ymm12,ymm1,ymm7,0x01
-	vpclmulqdq	ymm13,ymm2,ymm8,0x01
-	vpternlogd	ymm11,ymm12,ymm13,0x96
-	vpclmulqdq	ymm12,ymm3,ymm9,0x01
-	vpclmulqdq	ymm13,ymm0,ymm6,0x10
-	vpternlogd	ymm11,ymm12,ymm13,0x96
-	vpclmulqdq	ymm12,ymm1,ymm7,0x10
-	vpclmulqdq	ymm13,ymm2,ymm8,0x10
-	vpternlogd	ymm11,ymm12,ymm13,0x96
-	vpclmulqdq	ymm13,ymm10,ymm5,0x01
-	vpclmulqdq	ymm12,ymm3,ymm9,0x10
-	vpxord	ymm11,ymm11,ymm12
-	vpshufd	ymm5,ymm5,0x4e
-	vpclmulqdq	ymm0,ymm0,ymm6,0x11
-	vpclmulqdq	ymm1,ymm1,ymm7,0x11
-	vpclmulqdq	ymm2,ymm2,ymm8,0x11
-	vpternlogd	ymm11,ymm5,ymm13,0x96
-	vpclmulqdq	ymm3,ymm3,ymm9,0x11
-	vpternlogd	ymm0,ymm1,ymm2,0x96
-	vpclmulqdq	ymm12,ymm10,ymm11,0x01
-	vpxord	ymm5,ymm0,ymm3
-	vpshufd	ymm11,ymm11,0x4e
-	vpternlogd	ymm5,ymm11,ymm12,0x96
-	vextracti32x4	xmm0,ymm5,1
-	vpxord	xmm5,xmm5,xmm0
-
-	sub	r8,-4*32
-	add	r9,-4*32
-	cmp	r9,4*32-1
-	ja	NEAR $L$aad_loop_4x__func1
-
-
-	cmp	r9,32
-	jb	NEAR $L$aad_large_done__func1
-$L$aad_loop_1x__func1:
-	vmovdqu8	ymm0,YMMWORD[r8]
-	vpshufb	ymm0,ymm0,ymm4
-	vpxord	ymm5,ymm5,ymm0
-	vpclmulqdq	ymm0,ymm5,ymm9,0x00
-	vpclmulqdq	ymm1,ymm5,ymm9,0x01
-	vpclmulqdq	ymm2,ymm5,ymm9,0x10
-	vpxord	ymm1,ymm1,ymm2
-	vpclmulqdq	ymm2,ymm10,ymm0,0x01
-	vpshufd	ymm0,ymm0,0x4e
-	vpternlogd	ymm1,ymm0,ymm2,0x96
-	vpclmulqdq	ymm5,ymm5,ymm9,0x11
-	vpclmulqdq	ymm0,ymm10,ymm1,0x01
-	vpshufd	ymm1,ymm1,0x4e
-	vpternlogd	ymm5,ymm1,ymm0,0x96
-
-	vextracti32x4	xmm0,ymm5,1
-	vpxord	xmm5,xmm5,xmm0
-
-	add	r8,32
-	sub	r9,32
-	cmp	r9,32
-	jae	NEAR $L$aad_loop_1x__func1
-
-$L$aad_large_done__func1:
-
-
-	vzeroupper
-
-
-$L$aad_blockbyblock__func1:
-	test	r9,r9
-	jz	NEAR $L$aad_done__func1
-	vmovdqu	xmm9,XMMWORD[((256-16))+rdx]
-$L$aad_loop_blockbyblock__func1:
-	vmovdqu	xmm0,XMMWORD[r8]
-	vpshufb	xmm0,xmm0,xmm4
-	vpxor	xmm5,xmm5,xmm0
-	vpclmulqdq	xmm0,xmm5,xmm9,0x00
-	vpclmulqdq	xmm1,xmm5,xmm9,0x01
-	vpclmulqdq	xmm2,xmm5,xmm9,0x10
-	vpxord	xmm1,xmm1,xmm2
-	vpclmulqdq	xmm2,xmm10,xmm0,0x01
-	vpshufd	xmm0,xmm0,0x4e
-	vpternlogd	xmm1,xmm0,xmm2,0x96
-	vpclmulqdq	xmm5,xmm5,xmm9,0x11
-	vpclmulqdq	xmm0,xmm10,xmm1,0x01
-	vpshufd	xmm1,xmm1,0x4e
-	vpternlogd	xmm5,xmm1,xmm0,0x96
-
-	add	r8,16
-	sub	r9,16
-	jnz	NEAR $L$aad_loop_blockbyblock__func1
-
-$L$aad_done__func1:
-
-	vpshufb	xmm5,xmm5,xmm4
-	vmovdqu	XMMWORD[rcx],xmm5
-	movdqa	xmm6,XMMWORD[rsp]
-	movdqa	xmm7,XMMWORD[16+rsp]
-	movdqa	xmm8,XMMWORD[32+rsp]
-	movdqa	xmm9,XMMWORD[48+rsp]
-	movdqa	xmm10,XMMWORD[64+rsp]
-	movdqa	xmm11,XMMWORD[80+rsp]
-	movdqa	xmm12,XMMWORD[96+rsp]
-	movdqa	xmm13,XMMWORD[112+rsp]
-	add	rsp,136
-	ret
-$L$SEH_end_gcm_ghash_vpclmulqdq_avx10_256_12:
-
-
-global	aes_gcm_enc_update_vaes_avx10_256
-
-ALIGN	32
-aes_gcm_enc_update_vaes_avx10_256:
-
-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1:
-_CET_ENDBR
-	push	rsi
-$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_2:
-	push	rdi
-$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_3:
-	push	r12
-$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_4:
-
-	mov	rsi,QWORD[64+rsp]
-	mov	rdi,QWORD[72+rsp]
-	mov	r12,QWORD[80+rsp]
-	sub	rsp,160
-$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_5:
-	movdqa	XMMWORD[rsp],xmm6
-$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_6:
-	movdqa	XMMWORD[16+rsp],xmm7
-$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_7:
-	movdqa	XMMWORD[32+rsp],xmm8
-$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_8:
-	movdqa	XMMWORD[48+rsp],xmm9
-$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_9:
-	movdqa	XMMWORD[64+rsp],xmm10
-$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_10:
-	movdqa	XMMWORD[80+rsp],xmm11
-$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_11:
-	movdqa	XMMWORD[96+rsp],xmm12
-$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_12:
-	movdqa	XMMWORD[112+rsp],xmm13
-$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_13:
-	movdqa	XMMWORD[128+rsp],xmm14
-$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_14:
-	movdqa	XMMWORD[144+rsp],xmm15
-$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_15:
-
-$L$SEH_endprologue_aes_gcm_enc_update_vaes_avx10_256_16:
-%ifdef BORINGSSL_DISPATCH_TEST
-EXTERN	BORINGSSL_function_hit
-	mov	BYTE[((BORINGSSL_function_hit+6))],1
-%endif
-
-	vbroadcasti32x4	ymm8,YMMWORD[$L$bswap_mask]
-	vbroadcasti32x4	ymm31,YMMWORD[$L$gfpoly]
-
-
-
-	vmovdqu	xmm10,XMMWORD[r12]
-	vpshufb	xmm10,xmm10,xmm8
-	vbroadcasti32x4	ymm12,YMMWORD[rsi]
-	vpshufb	ymm12,ymm12,ymm8
-
-
-
-	mov	r10d,DWORD[240+r9]
-	lea	r10d,[((-20))+r10*4]
-
-
-
-
-	lea	r11,[96+r10*4+r9]
-	vbroadcasti32x4	ymm13,YMMWORD[r9]
-	vbroadcasti32x4	ymm14,YMMWORD[r11]
-
-
-	vpaddd	ymm12,ymm12,YMMWORD[$L$ctr_pattern]
-
-
-	vbroadcasti32x4	ymm11,YMMWORD[$L$inc_2blocks]
-
-
-
-	cmp	r8,4*32-1
-	jbe	NEAR $L$crypt_loop_4x_done__func1
-
-
-	vmovdqu8	ymm27,YMMWORD[((256-128))+rdi]
-	vmovdqu8	ymm28,YMMWORD[((256-96))+rdi]
-	vmovdqu8	ymm29,YMMWORD[((256-64))+rdi]
-	vmovdqu8	ymm30,YMMWORD[((256-32))+rdi]
-
-
-
-
-	vpshufb	ymm0,ymm12,ymm8
-	vpaddd	ymm12,ymm12,ymm11
-	vpshufb	ymm1,ymm12,ymm8
-	vpaddd	ymm12,ymm12,ymm11
-	vpshufb	ymm2,ymm12,ymm8
-	vpaddd	ymm12,ymm12,ymm11
-	vpshufb	ymm3,ymm12,ymm8
-	vpaddd	ymm12,ymm12,ymm11
-
-
-	vpxord	ymm0,ymm0,ymm13
-	vpxord	ymm1,ymm1,ymm13
-	vpxord	ymm2,ymm2,ymm13
-	vpxord	ymm3,ymm3,ymm13
-
-	lea	rax,[16+r9]
-$L$vaesenc_loop_first_4_vecs__func1:
-	vbroadcasti32x4	ymm9,YMMWORD[rax]
-	vaesenc	ymm0,ymm0,ymm9
-	vaesenc	ymm1,ymm1,ymm9
-	vaesenc	ymm2,ymm2,ymm9
-	vaesenc	ymm3,ymm3,ymm9
-
-	add	rax,16
-	cmp	r11,rax
-	jne	NEAR $L$vaesenc_loop_first_4_vecs__func1
-
-
-
-	vpxord	ymm4,ymm14,YMMWORD[rcx]
-	vpxord	ymm5,ymm14,YMMWORD[32+rcx]
-	vpxord	ymm6,ymm14,YMMWORD[64+rcx]
-	vpxord	ymm7,ymm14,YMMWORD[96+rcx]
-
-
-
-	vaesenclast	ymm4,ymm0,ymm4
-	vaesenclast	ymm5,ymm1,ymm5
-	vaesenclast	ymm6,ymm2,ymm6
-	vaesenclast	ymm7,ymm3,ymm7
-
-
-	vmovdqu8	YMMWORD[rdx],ymm4
-	vmovdqu8	YMMWORD[32+rdx],ymm5
-	vmovdqu8	YMMWORD[64+rdx],ymm6
-	vmovdqu8	YMMWORD[96+rdx],ymm7
-
-	sub	rcx,-4*32
-	sub	rdx,-4*32
-	add	r8,-4*32
-	cmp	r8,4*32-1
-	jbe	NEAR $L$ghash_last_ciphertext_4x__func1
-	vbroadcasti32x4	ymm15,YMMWORD[((-144))+r11]
-	vbroadcasti32x4	ymm16,YMMWORD[((-128))+r11]
-	vbroadcasti32x4	ymm17,YMMWORD[((-112))+r11]
-	vbroadcasti32x4	ymm18,YMMWORD[((-96))+r11]
-	vbroadcasti32x4	ymm19,YMMWORD[((-80))+r11]
-	vbroadcasti32x4	ymm20,YMMWORD[((-64))+r11]
-	vbroadcasti32x4	ymm21,YMMWORD[((-48))+r11]
-	vbroadcasti32x4	ymm22,YMMWORD[((-32))+r11]
-	vbroadcasti32x4	ymm23,YMMWORD[((-16))+r11]
-$L$crypt_loop_4x__func1:
-
-
-
-	vpshufb	ymm0,ymm12,ymm8
-	vpaddd	ymm12,ymm12,ymm11
-	vpshufb	ymm1,ymm12,ymm8
-	vpaddd	ymm12,ymm12,ymm11
-	vpshufb	ymm2,ymm12,ymm8
-	vpaddd	ymm12,ymm12,ymm11
-	vpshufb	ymm3,ymm12,ymm8
-	vpaddd	ymm12,ymm12,ymm11
-
-
-	vpxord	ymm0,ymm0,ymm13
-	vpxord	ymm1,ymm1,ymm13
-	vpxord	ymm2,ymm2,ymm13
-	vpxord	ymm3,ymm3,ymm13
-
-	cmp	r10d,24
-	jl	NEAR $L$aes128__func1
-	je	NEAR $L$aes192__func1
-
-	vbroadcasti32x4	ymm9,YMMWORD[((-208))+r11]
-	vaesenc	ymm0,ymm0,ymm9
-	vaesenc	ymm1,ymm1,ymm9
-	vaesenc	ymm2,ymm2,ymm9
-	vaesenc	ymm3,ymm3,ymm9
-
-	vbroadcasti32x4	ymm9,YMMWORD[((-192))+r11]
-	vaesenc	ymm0,ymm0,ymm9
-	vaesenc	ymm1,ymm1,ymm9
-	vaesenc	ymm2,ymm2,ymm9
-	vaesenc	ymm3,ymm3,ymm9
-
-$L$aes192__func1:
-	vbroadcasti32x4	ymm9,YMMWORD[((-176))+r11]
-	vaesenc	ymm0,ymm0,ymm9
-	vaesenc	ymm1,ymm1,ymm9
-	vaesenc	ymm2,ymm2,ymm9
-	vaesenc	ymm3,ymm3,ymm9
-
-	vbroadcasti32x4	ymm9,YMMWORD[((-160))+r11]
-	vaesenc	ymm0,ymm0,ymm9
-	vaesenc	ymm1,ymm1,ymm9
-	vaesenc	ymm2,ymm2,ymm9
-	vaesenc	ymm3,ymm3,ymm9
-
-$L$aes128__func1:
-	vpshufb	ymm4,ymm4,ymm8
-	vpxord	ymm4,ymm4,ymm10
-	vpshufb	ymm5,ymm5,ymm8
-	vpshufb	ymm6,ymm6,ymm8
-
-	vaesenc	ymm0,ymm0,ymm15
-	vaesenc	ymm1,ymm1,ymm15
-	vaesenc	ymm2,ymm2,ymm15
-	vaesenc	ymm3,ymm3,ymm15
-
-	vpshufb	ymm7,ymm7,ymm8
-	vpclmulqdq	ymm10,ymm4,ymm27,0x00
-	vpclmulqdq	ymm24,ymm5,ymm28,0x00
-	vpclmulqdq	ymm25,ymm6,ymm29,0x00
-
-	vaesenc	ymm0,ymm0,ymm16
-	vaesenc	ymm1,ymm1,ymm16
-	vaesenc	ymm2,ymm2,ymm16
-	vaesenc	ymm3,ymm3,ymm16
-
-	vpxord	ymm10,ymm10,ymm24
-	vpclmulqdq	ymm26,ymm7,ymm30,0x00
-	vpternlogd	ymm10,ymm25,ymm26,0x96
-	vpclmulqdq	ymm24,ymm4,ymm27,0x01
-
-	vaesenc	ymm0,ymm0,ymm17
-	vaesenc	ymm1,ymm1,ymm17
-	vaesenc	ymm2,ymm2,ymm17
-	vaesenc	ymm3,ymm3,ymm17
-
-	vpclmulqdq	ymm25,ymm5,ymm28,0x01
-	vpclmulqdq	ymm26,ymm6,ymm29,0x01
-	vpternlogd	ymm24,ymm25,ymm26,0x96
-	vpclmulqdq	ymm25,ymm7,ymm30,0x01
-
-	vaesenc	ymm0,ymm0,ymm18
-	vaesenc	ymm1,ymm1,ymm18
-	vaesenc	ymm2,ymm2,ymm18
-	vaesenc	ymm3,ymm3,ymm18
-
-	vpclmulqdq	ymm26,ymm4,ymm27,0x10
-	vpternlogd	ymm24,ymm25,ymm26,0x96
-	vpclmulqdq	ymm25,ymm5,ymm28,0x10
-	vpclmulqdq	ymm26,ymm6,ymm29,0x10
-
-	vaesenc	ymm0,ymm0,ymm19
-	vaesenc	ymm1,ymm1,ymm19
-	vaesenc	ymm2,ymm2,ymm19
-	vaesenc	ymm3,ymm3,ymm19
-
-	vpternlogd	ymm24,ymm25,ymm26,0x96
-	vpclmulqdq	ymm26,ymm31,ymm10,0x01
-	vpclmulqdq	ymm25,ymm7,ymm30,0x10
-	vpxord	ymm24,ymm24,ymm25
-
-	vaesenc	ymm0,ymm0,ymm20
-	vaesenc	ymm1,ymm1,ymm20
-	vaesenc	ymm2,ymm2,ymm20
-	vaesenc	ymm3,ymm3,ymm20
-
-	vpshufd	ymm10,ymm10,0x4e
-	vpclmulqdq	ymm4,ymm4,ymm27,0x11
-	vpclmulqdq	ymm5,ymm5,ymm28,0x11
-	vpclmulqdq	ymm6,ymm6,ymm29,0x11
-
-	vaesenc	ymm0,ymm0,ymm21
-	vaesenc	ymm1,ymm1,ymm21
-	vaesenc	ymm2,ymm2,ymm21
-	vaesenc	ymm3,ymm3,ymm21
-
-	vpternlogd	ymm24,ymm10,ymm26,0x96
-	vpclmulqdq	ymm7,ymm7,ymm30,0x11
-	vpternlogd	ymm4,ymm5,ymm6,0x96
-	vpclmulqdq	ymm25,ymm31,ymm24,0x01
-
-	vaesenc	ymm0,ymm0,ymm22
-	vaesenc	ymm1,ymm1,ymm22
-	vaesenc	ymm2,ymm2,ymm22
-	vaesenc	ymm3,ymm3,ymm22
-
-	vpxord	ymm10,ymm4,ymm7
-	vpshufd	ymm24,ymm24,0x4e
-	vpternlogd	ymm10,ymm24,ymm25,0x96
-
-	vaesenc	ymm0,ymm0,ymm23
-	vaesenc	ymm1,ymm1,ymm23
-	vaesenc	ymm2,ymm2,ymm23
-	vaesenc	ymm3,ymm3,ymm23
-
-	vextracti32x4	xmm4,ymm10,1
-	vpxord	xmm10,xmm10,xmm4
-
-
-
-
-	vpxord	ymm4,ymm14,YMMWORD[rcx]
-	vpxord	ymm5,ymm14,YMMWORD[32+rcx]
-	vpxord	ymm6,ymm14,YMMWORD[64+rcx]
-	vpxord	ymm7,ymm14,YMMWORD[96+rcx]
-
-
-
-	vaesenclast	ymm4,ymm0,ymm4
-	vaesenclast	ymm5,ymm1,ymm5
-	vaesenclast	ymm6,ymm2,ymm6
-	vaesenclast	ymm7,ymm3,ymm7
-
-
-	vmovdqu8	YMMWORD[rdx],ymm4
-	vmovdqu8	YMMWORD[32+rdx],ymm5
-	vmovdqu8	YMMWORD[64+rdx],ymm6
-	vmovdqu8	YMMWORD[96+rdx],ymm7
-
-	sub	rcx,-4*32
-	sub	rdx,-4*32
-	add	r8,-4*32
-	cmp	r8,4*32-1
-	ja	NEAR $L$crypt_loop_4x__func1
-$L$ghash_last_ciphertext_4x__func1:
-	vpshufb	ymm4,ymm4,ymm8
-	vpxord	ymm4,ymm4,ymm10
-	vpshufb	ymm5,ymm5,ymm8
-	vpshufb	ymm6,ymm6,ymm8
-	vpshufb	ymm7,ymm7,ymm8
-	vpclmulqdq	ymm10,ymm4,ymm27,0x00
-	vpclmulqdq	ymm24,ymm5,ymm28,0x00
-	vpclmulqdq	ymm25,ymm6,ymm29,0x00
-	vpxord	ymm10,ymm10,ymm24
-	vpclmulqdq	ymm26,ymm7,ymm30,0x00
-	vpternlogd	ymm10,ymm25,ymm26,0x96
-	vpclmulqdq	ymm24,ymm4,ymm27,0x01
-	vpclmulqdq	ymm25,ymm5,ymm28,0x01
-	vpclmulqdq	ymm26,ymm6,ymm29,0x01
-	vpternlogd	ymm24,ymm25,ymm26,0x96
-	vpclmulqdq	ymm25,ymm7,ymm30,0x01
-	vpclmulqdq	ymm26,ymm4,ymm27,0x10
-	vpternlogd	ymm24,ymm25,ymm26,0x96
-	vpclmulqdq	ymm25,ymm5,ymm28,0x10
-	vpclmulqdq	ymm26,ymm6,ymm29,0x10
-	vpternlogd	ymm24,ymm25,ymm26,0x96
-	vpclmulqdq	ymm26,ymm31,ymm10,0x01
-	vpclmulqdq	ymm25,ymm7,ymm30,0x10
-	vpxord	ymm24,ymm24,ymm25
-	vpshufd	ymm10,ymm10,0x4e
-	vpclmulqdq	ymm4,ymm4,ymm27,0x11
-	vpclmulqdq	ymm5,ymm5,ymm28,0x11
-	vpclmulqdq	ymm6,ymm6,ymm29,0x11
-	vpternlogd	ymm24,ymm10,ymm26,0x96
-	vpclmulqdq	ymm7,ymm7,ymm30,0x11
-	vpternlogd	ymm4,ymm5,ymm6,0x96
-	vpclmulqdq	ymm25,ymm31,ymm24,0x01
-	vpxord	ymm10,ymm4,ymm7
-	vpshufd	ymm24,ymm24,0x4e
-	vpternlogd	ymm10,ymm24,ymm25,0x96
-	vextracti32x4	xmm4,ymm10,1
-	vpxord	xmm10,xmm10,xmm4
-
-$L$crypt_loop_4x_done__func1:
-
-	test	r8,r8
-	jz	NEAR $L$done__func1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-	mov	rax,r8
-	neg	rax
-	and	rax,-16
-	lea	rsi,[256+rax*1+rdi]
-	vpxor	xmm4,xmm4,xmm4
-	vpxor	xmm5,xmm5,xmm5
-	vpxor	xmm6,xmm6,xmm6
-
-	cmp	r8,32
-	jb	NEAR $L$partial_vec__func1
-
-$L$crypt_loop_1x__func1:
-
-
-
-	vpshufb	ymm0,ymm12,ymm8
-	vpaddd	ymm12,ymm12,ymm11
-	vpxord	ymm0,ymm0,ymm13
-	lea	rax,[16+r9]
-$L$vaesenc_loop_tail_full_vec__func1:
-	vbroadcasti32x4	ymm9,YMMWORD[rax]
-	vaesenc	ymm0,ymm0,ymm9
-	add	rax,16
-	cmp	r11,rax
-	jne	NEAR $L$vaesenc_loop_tail_full_vec__func1
-	vaesenclast	ymm0,ymm0,ymm14
-
-
-	vmovdqu8	ymm1,YMMWORD[rcx]
-	vpxord	ymm0,ymm0,ymm1
-	vmovdqu8	YMMWORD[rdx],ymm0
-
-
-	vmovdqu8	ymm30,YMMWORD[rsi]
-	vpshufb	ymm0,ymm0,ymm8
-	vpxord	ymm0,ymm0,ymm10
-	vpclmulqdq	ymm7,ymm0,ymm30,0x00
-	vpclmulqdq	ymm1,ymm0,ymm30,0x01
-	vpclmulqdq	ymm2,ymm0,ymm30,0x10
-	vpclmulqdq	ymm3,ymm0,ymm30,0x11
-	vpxord	ymm4,ymm4,ymm7
-	vpternlogd	ymm5,ymm1,ymm2,0x96
-	vpxord	ymm6,ymm6,ymm3
-
-	vpxor	xmm10,xmm10,xmm10
-
-	add	rsi,32
-	add	rcx,32
-	add	rdx,32
-	sub	r8,32
-	cmp	r8,32
-	jae	NEAR $L$crypt_loop_1x__func1
-
-	test	r8,r8
-	jz	NEAR $L$reduce__func1
-
-$L$partial_vec__func1:
-
-
-
-
-	mov	rax,-1
-	bzhi	rax,rax,r8
-	kmovd	k1,eax
-	add	r8,15
-	and	r8,-16
-	mov	rax,-1
-	bzhi	rax,rax,r8
-	kmovd	k2,eax
-
-
-
-	vpshufb	ymm0,ymm12,ymm8
-	vpxord	ymm0,ymm0,ymm13
-	lea	rax,[16+r9]
-$L$vaesenc_loop_tail_partialvec__func1:
-	vbroadcasti32x4	ymm9,YMMWORD[rax]
-	vaesenc	ymm0,ymm0,ymm9
-	add	rax,16
-	cmp	r11,rax
-	jne	NEAR $L$vaesenc_loop_tail_partialvec__func1
-	vaesenclast	ymm0,ymm0,ymm14
-
-
-	vmovdqu8	ymm1{k1}{z},[rcx]
-	vpxord	ymm0,ymm0,ymm1
-	vmovdqu8	YMMWORD[rdx]{k1},ymm0
-
-
-
-
-
-
-
-
-
-
-
-
-
-	vmovdqu8	ymm30{k2}{z},[rsi]
-	vmovdqu8	ymm1{k1}{z},ymm0
-	vpshufb	ymm0,ymm1,ymm8
-	vpxord	ymm0,ymm0,ymm10
-	vpclmulqdq	ymm7,ymm0,ymm30,0x00
-	vpclmulqdq	ymm1,ymm0,ymm30,0x01
-	vpclmulqdq	ymm2,ymm0,ymm30,0x10
-	vpclmulqdq	ymm3,ymm0,ymm30,0x11
-	vpxord	ymm4,ymm4,ymm7
-	vpternlogd	ymm5,ymm1,ymm2,0x96
-	vpxord	ymm6,ymm6,ymm3
-
-
-$L$reduce__func1:
-
-	vpclmulqdq	ymm0,ymm31,ymm4,0x01
-	vpshufd	ymm4,ymm4,0x4e
-	vpternlogd	ymm5,ymm4,ymm0,0x96
-	vpclmulqdq	ymm0,ymm31,ymm5,0x01
-	vpshufd	ymm5,ymm5,0x4e
-	vpternlogd	ymm6,ymm5,ymm0,0x96
-
-	vextracti32x4	xmm0,ymm6,1
-	vpxord	xmm10,xmm6,xmm0
-
-
-$L$done__func1:
-
-	vpshufb	xmm10,xmm10,xmm8
-	vmovdqu	XMMWORD[r12],xmm10
-
-	vzeroupper
-	movdqa	xmm6,XMMWORD[rsp]
-	movdqa	xmm7,XMMWORD[16+rsp]
-	movdqa	xmm8,XMMWORD[32+rsp]
-	movdqa	xmm9,XMMWORD[48+rsp]
-	movdqa	xmm10,XMMWORD[64+rsp]
-	movdqa	xmm11,XMMWORD[80+rsp]
-	movdqa	xmm12,XMMWORD[96+rsp]
-	movdqa	xmm13,XMMWORD[112+rsp]
-	movdqa	xmm14,XMMWORD[128+rsp]
-	movdqa	xmm15,XMMWORD[144+rsp]
-	add	rsp,160
-	pop	r12
-	pop	rdi
-	pop	rsi
-	ret
-$L$SEH_end_aes_gcm_enc_update_vaes_avx10_256_17:
-
-
-global	aes_gcm_dec_update_vaes_avx10_256
-
-ALIGN	32
-aes_gcm_dec_update_vaes_avx10_256:
-
-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1:
-_CET_ENDBR
-	push	rsi
-$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_2:
-	push	rdi
-$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_3:
-	push	r12
-$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_4:
-
-	mov	rsi,QWORD[64+rsp]
-	mov	rdi,QWORD[72+rsp]
-	mov	r12,QWORD[80+rsp]
-	sub	rsp,160
-$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_5:
-	movdqa	XMMWORD[rsp],xmm6
-$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_6:
-	movdqa	XMMWORD[16+rsp],xmm7
-$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_7:
-	movdqa	XMMWORD[32+rsp],xmm8
-$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_8:
-	movdqa	XMMWORD[48+rsp],xmm9
-$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_9:
-	movdqa	XMMWORD[64+rsp],xmm10
-$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_10:
-	movdqa	XMMWORD[80+rsp],xmm11
-$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_11:
-	movdqa	XMMWORD[96+rsp],xmm12
-$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_12:
-	movdqa	XMMWORD[112+rsp],xmm13
-$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_13:
-	movdqa	XMMWORD[128+rsp],xmm14
-$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_14:
-	movdqa	XMMWORD[144+rsp],xmm15
-$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_15:
-
-$L$SEH_endprologue_aes_gcm_dec_update_vaes_avx10_256_16:
-
-	vbroadcasti32x4	ymm8,YMMWORD[$L$bswap_mask]
-	vbroadcasti32x4	ymm31,YMMWORD[$L$gfpoly]
-
-
-
-	vmovdqu	xmm10,XMMWORD[r12]
-	vpshufb	xmm10,xmm10,xmm8
-	vbroadcasti32x4	ymm12,YMMWORD[rsi]
-	vpshufb	ymm12,ymm12,ymm8
-
-
-
-	mov	r10d,DWORD[240+r9]
-	lea	r10d,[((-20))+r10*4]
-
-
-
-
-	lea	r11,[96+r10*4+r9]
-	vbroadcasti32x4	ymm13,YMMWORD[r9]
-	vbroadcasti32x4	ymm14,YMMWORD[r11]
-
-
-	vpaddd	ymm12,ymm12,YMMWORD[$L$ctr_pattern]
-
-
-	vbroadcasti32x4	ymm11,YMMWORD[$L$inc_2blocks]
-
-
-
-	cmp	r8,4*32-1
-	jbe	NEAR $L$crypt_loop_4x_done__func2
-
-
-	vmovdqu8	ymm27,YMMWORD[((256-128))+rdi]
-	vmovdqu8	ymm28,YMMWORD[((256-96))+rdi]
-	vmovdqu8	ymm29,YMMWORD[((256-64))+rdi]
-	vmovdqu8	ymm30,YMMWORD[((256-32))+rdi]
-	vbroadcasti32x4	ymm15,YMMWORD[((-144))+r11]
-	vbroadcasti32x4	ymm16,YMMWORD[((-128))+r11]
-	vbroadcasti32x4	ymm17,YMMWORD[((-112))+r11]
-	vbroadcasti32x4	ymm18,YMMWORD[((-96))+r11]
-	vbroadcasti32x4	ymm19,YMMWORD[((-80))+r11]
-	vbroadcasti32x4	ymm20,YMMWORD[((-64))+r11]
-	vbroadcasti32x4	ymm21,YMMWORD[((-48))+r11]
-	vbroadcasti32x4	ymm22,YMMWORD[((-32))+r11]
-	vbroadcasti32x4	ymm23,YMMWORD[((-16))+r11]
-$L$crypt_loop_4x__func2:
-	vmovdqu8	ymm4,YMMWORD[rcx]
-	vmovdqu8	ymm5,YMMWORD[32+rcx]
-	vmovdqu8	ymm6,YMMWORD[64+rcx]
-	vmovdqu8	ymm7,YMMWORD[96+rcx]
-
-
-
-	vpshufb	ymm0,ymm12,ymm8
-	vpaddd	ymm12,ymm12,ymm11
-	vpshufb	ymm1,ymm12,ymm8
-	vpaddd	ymm12,ymm12,ymm11
-	vpshufb	ymm2,ymm12,ymm8
-	vpaddd	ymm12,ymm12,ymm11
-	vpshufb	ymm3,ymm12,ymm8
-	vpaddd	ymm12,ymm12,ymm11
-
-
-	vpxord	ymm0,ymm0,ymm13
-	vpxord	ymm1,ymm1,ymm13
-	vpxord	ymm2,ymm2,ymm13
-	vpxord	ymm3,ymm3,ymm13
-
-	cmp	r10d,24
-	jl	NEAR $L$aes128__func2
-	je	NEAR $L$aes192__func2
-
-	vbroadcasti32x4	ymm9,YMMWORD[((-208))+r11]
-	vaesenc	ymm0,ymm0,ymm9
-	vaesenc	ymm1,ymm1,ymm9
-	vaesenc	ymm2,ymm2,ymm9
-	vaesenc	ymm3,ymm3,ymm9
-
-	vbroadcasti32x4	ymm9,YMMWORD[((-192))+r11]
-	vaesenc	ymm0,ymm0,ymm9
-	vaesenc	ymm1,ymm1,ymm9
-	vaesenc	ymm2,ymm2,ymm9
-	vaesenc	ymm3,ymm3,ymm9
-
-$L$aes192__func2:
-	vbroadcasti32x4	ymm9,YMMWORD[((-176))+r11]
-	vaesenc	ymm0,ymm0,ymm9
-	vaesenc	ymm1,ymm1,ymm9
-	vaesenc	ymm2,ymm2,ymm9
-	vaesenc	ymm3,ymm3,ymm9
-
-	vbroadcasti32x4	ymm9,YMMWORD[((-160))+r11]
-	vaesenc	ymm0,ymm0,ymm9
-	vaesenc	ymm1,ymm1,ymm9
-	vaesenc	ymm2,ymm2,ymm9
-	vaesenc	ymm3,ymm3,ymm9
-
-$L$aes128__func2:
-	vpshufb	ymm4,ymm4,ymm8
-	vpxord	ymm4,ymm4,ymm10
-	vpshufb	ymm5,ymm5,ymm8
-	vpshufb	ymm6,ymm6,ymm8
-
-	vaesenc	ymm0,ymm0,ymm15
-	vaesenc	ymm1,ymm1,ymm15
-	vaesenc	ymm2,ymm2,ymm15
-	vaesenc	ymm3,ymm3,ymm15
-
-	vpshufb	ymm7,ymm7,ymm8
-	vpclmulqdq	ymm10,ymm4,ymm27,0x00
-	vpclmulqdq	ymm24,ymm5,ymm28,0x00
-	vpclmulqdq	ymm25,ymm6,ymm29,0x00
-
-	vaesenc	ymm0,ymm0,ymm16
-	vaesenc	ymm1,ymm1,ymm16
-	vaesenc	ymm2,ymm2,ymm16
-	vaesenc	ymm3,ymm3,ymm16
-
-	vpxord	ymm10,ymm10,ymm24
-	vpclmulqdq	ymm26,ymm7,ymm30,0x00
-	vpternlogd	ymm10,ymm25,ymm26,0x96
-	vpclmulqdq	ymm24,ymm4,ymm27,0x01
-
-	vaesenc	ymm0,ymm0,ymm17
-	vaesenc	ymm1,ymm1,ymm17
-	vaesenc	ymm2,ymm2,ymm17
-	vaesenc	ymm3,ymm3,ymm17
-
-	vpclmulqdq	ymm25,ymm5,ymm28,0x01
-	vpclmulqdq	ymm26,ymm6,ymm29,0x01
-	vpternlogd	ymm24,ymm25,ymm26,0x96
-	vpclmulqdq	ymm25,ymm7,ymm30,0x01
-
-	vaesenc	ymm0,ymm0,ymm18
-	vaesenc	ymm1,ymm1,ymm18
-	vaesenc	ymm2,ymm2,ymm18
-	vaesenc	ymm3,ymm3,ymm18
-
-	vpclmulqdq	ymm26,ymm4,ymm27,0x10
-	vpternlogd	ymm24,ymm25,ymm26,0x96
-	vpclmulqdq	ymm25,ymm5,ymm28,0x10
-	vpclmulqdq	ymm26,ymm6,ymm29,0x10
-
-	vaesenc	ymm0,ymm0,ymm19
-	vaesenc	ymm1,ymm1,ymm19
-	vaesenc	ymm2,ymm2,ymm19
-	vaesenc	ymm3,ymm3,ymm19
-
-	vpternlogd	ymm24,ymm25,ymm26,0x96
-	vpclmulqdq	ymm26,ymm31,ymm10,0x01
-	vpclmulqdq	ymm25,ymm7,ymm30,0x10
-	vpxord	ymm24,ymm24,ymm25
-
-	vaesenc	ymm0,ymm0,ymm20
-	vaesenc	ymm1,ymm1,ymm20
-	vaesenc	ymm2,ymm2,ymm20
-	vaesenc	ymm3,ymm3,ymm20
-
-	vpshufd	ymm10,ymm10,0x4e
-	vpclmulqdq	ymm4,ymm4,ymm27,0x11
-	vpclmulqdq	ymm5,ymm5,ymm28,0x11
-	vpclmulqdq	ymm6,ymm6,ymm29,0x11
-
-	vaesenc	ymm0,ymm0,ymm21
-	vaesenc	ymm1,ymm1,ymm21
-	vaesenc	ymm2,ymm2,ymm21
-	vaesenc	ymm3,ymm3,ymm21
-
-	vpternlogd	ymm24,ymm10,ymm26,0x96
-	vpclmulqdq	ymm7,ymm7,ymm30,0x11
-	vpternlogd	ymm4,ymm5,ymm6,0x96
-	vpclmulqdq	ymm25,ymm31,ymm24,0x01
-
-	vaesenc	ymm0,ymm0,ymm22
-	vaesenc	ymm1,ymm1,ymm22
-	vaesenc	ymm2,ymm2,ymm22
-	vaesenc	ymm3,ymm3,ymm22
-
-	vpxord	ymm10,ymm4,ymm7
-	vpshufd	ymm24,ymm24,0x4e
-	vpternlogd	ymm10,ymm24,ymm25,0x96
-
-	vaesenc	ymm0,ymm0,ymm23
-	vaesenc	ymm1,ymm1,ymm23
-	vaesenc	ymm2,ymm2,ymm23
-	vaesenc	ymm3,ymm3,ymm23
-
-	vextracti32x4	xmm4,ymm10,1
-	vpxord	xmm10,xmm10,xmm4
-
-
-
-
-	vpxord	ymm4,ymm14,YMMWORD[rcx]
-	vpxord	ymm5,ymm14,YMMWORD[32+rcx]
-	vpxord	ymm6,ymm14,YMMWORD[64+rcx]
-	vpxord	ymm7,ymm14,YMMWORD[96+rcx]
-
-
-
-	vaesenclast	ymm4,ymm0,ymm4
-	vaesenclast	ymm5,ymm1,ymm5
-	vaesenclast	ymm6,ymm2,ymm6
-	vaesenclast	ymm7,ymm3,ymm7
-
-
-	vmovdqu8	YMMWORD[rdx],ymm4
-	vmovdqu8	YMMWORD[32+rdx],ymm5
-	vmovdqu8	YMMWORD[64+rdx],ymm6
-	vmovdqu8	YMMWORD[96+rdx],ymm7
-
-	sub	rcx,-4*32
-	sub	rdx,-4*32
-	add	r8,-4*32
-	cmp	r8,4*32-1
-	ja	NEAR $L$crypt_loop_4x__func2
-$L$crypt_loop_4x_done__func2:
-
-	test	r8,r8
-	jz	NEAR $L$done__func2
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-	mov	rax,r8
-	neg	rax
-	and	rax,-16
-	lea	rsi,[256+rax*1+rdi]
-	vpxor	xmm4,xmm4,xmm4
-	vpxor	xmm5,xmm5,xmm5
-	vpxor	xmm6,xmm6,xmm6
-
-	cmp	r8,32
-	jb	NEAR $L$partial_vec__func2
-
-$L$crypt_loop_1x__func2:
-
-
-
-	vpshufb	ymm0,ymm12,ymm8
-	vpaddd	ymm12,ymm12,ymm11
-	vpxord	ymm0,ymm0,ymm13
-	lea	rax,[16+r9]
-$L$vaesenc_loop_tail_full_vec__func2:
-	vbroadcasti32x4	ymm9,YMMWORD[rax]
-	vaesenc	ymm0,ymm0,ymm9
-	add	rax,16
-	cmp	r11,rax
-	jne	NEAR $L$vaesenc_loop_tail_full_vec__func2
-	vaesenclast	ymm0,ymm0,ymm14
-
-
-	vmovdqu8	ymm1,YMMWORD[rcx]
-	vpxord	ymm0,ymm0,ymm1
-	vmovdqu8	YMMWORD[rdx],ymm0
-
-
-	vmovdqu8	ymm30,YMMWORD[rsi]
-	vpshufb	ymm0,ymm1,ymm8
-	vpxord	ymm0,ymm0,ymm10
-	vpclmulqdq	ymm7,ymm0,ymm30,0x00
-	vpclmulqdq	ymm1,ymm0,ymm30,0x01
-	vpclmulqdq	ymm2,ymm0,ymm30,0x10
-	vpclmulqdq	ymm3,ymm0,ymm30,0x11
-	vpxord	ymm4,ymm4,ymm7
-	vpternlogd	ymm5,ymm1,ymm2,0x96
-	vpxord	ymm6,ymm6,ymm3
-
-	vpxor	xmm10,xmm10,xmm10
-
-	add	rsi,32
-	add	rcx,32
-	add	rdx,32
-	sub	r8,32
-	cmp	r8,32
-	jae	NEAR $L$crypt_loop_1x__func2
-
-	test	r8,r8
-	jz	NEAR $L$reduce__func2
-
-$L$partial_vec__func2:
-
-
-
-
-	mov	rax,-1
-	bzhi	rax,rax,r8
-	kmovd	k1,eax
-	add	r8,15
-	and	r8,-16
-	mov	rax,-1
-	bzhi	rax,rax,r8
-	kmovd	k2,eax
-
-
-
-	vpshufb	ymm0,ymm12,ymm8
-	vpxord	ymm0,ymm0,ymm13
-	lea	rax,[16+r9]
-$L$vaesenc_loop_tail_partialvec__func2:
-	vbroadcasti32x4	ymm9,YMMWORD[rax]
-	vaesenc	ymm0,ymm0,ymm9
-	add	rax,16
-	cmp	r11,rax
-	jne	NEAR $L$vaesenc_loop_tail_partialvec__func2
-	vaesenclast	ymm0,ymm0,ymm14
-
-
-	vmovdqu8	ymm1{k1}{z},[rcx]
-	vpxord	ymm0,ymm0,ymm1
-	vmovdqu8	YMMWORD[rdx]{k1},ymm0
-
-
-
-
-
-
-
-
-
-
-
-
-
-	vmovdqu8	ymm30{k2}{z},[rsi]
-
-	vpshufb	ymm0,ymm1,ymm8
-	vpxord	ymm0,ymm0,ymm10
-	vpclmulqdq	ymm7,ymm0,ymm30,0x00
-	vpclmulqdq	ymm1,ymm0,ymm30,0x01
-	vpclmulqdq	ymm2,ymm0,ymm30,0x10
-	vpclmulqdq	ymm3,ymm0,ymm30,0x11
-	vpxord	ymm4,ymm4,ymm7
-	vpternlogd	ymm5,ymm1,ymm2,0x96
-	vpxord	ymm6,ymm6,ymm3
-
-
-$L$reduce__func2:
-
-	vpclmulqdq	ymm0,ymm31,ymm4,0x01
-	vpshufd	ymm4,ymm4,0x4e
-	vpternlogd	ymm5,ymm4,ymm0,0x96
-	vpclmulqdq	ymm0,ymm31,ymm5,0x01
-	vpshufd	ymm5,ymm5,0x4e
-	vpternlogd	ymm6,ymm5,ymm0,0x96
-
-	vextracti32x4	xmm0,ymm6,1
-	vpxord	xmm10,xmm6,xmm0
-
-
-$L$done__func2:
-
-	vpshufb	xmm10,xmm10,xmm8
-	vmovdqu	XMMWORD[r12],xmm10
-
-	vzeroupper
-	movdqa	xmm6,XMMWORD[rsp]
-	movdqa	xmm7,XMMWORD[16+rsp]
-	movdqa	xmm8,XMMWORD[32+rsp]
-	movdqa	xmm9,XMMWORD[48+rsp]
-	movdqa	xmm10,XMMWORD[64+rsp]
-	movdqa	xmm11,XMMWORD[80+rsp]
-	movdqa	xmm12,XMMWORD[96+rsp]
-	movdqa	xmm13,XMMWORD[112+rsp]
-	movdqa	xmm14,XMMWORD[128+rsp]
-	movdqa	xmm15,XMMWORD[144+rsp]
-	add	rsp,160
-	pop	r12
-	pop	rdi
-	pop	rsi
-	ret
-$L$SEH_end_aes_gcm_dec_update_vaes_avx10_256_17:
-
-
 global	gcm_ghash_vpclmulqdq_avx10_512
 
 ALIGN	32
@@ -1364,7 +234,7 @@
 
 
 	cmp	r9,64
-	jb	NEAR $L$aad_blockbyblock__func2
+	jb	NEAR $L$aad_blockbyblock__func1
 
 
 
@@ -1375,7 +245,7 @@
 	vmovdqu8	zmm9,ZMMWORD[((256-64))+rdx]
 
 	cmp	r9,4*64-1
-	jbe	NEAR $L$aad_loop_1x__func2
+	jbe	NEAR $L$aad_loop_1x__func1
 
 
 	vmovdqu8	zmm6,ZMMWORD[((256-256))+rdx]
@@ -1383,7 +253,7 @@
 	vmovdqu8	zmm8,ZMMWORD[((256-128))+rdx]
 
 
-$L$aad_loop_4x__func2:
+$L$aad_loop_4x__func1:
 	vmovdqu8	zmm0,ZMMWORD[r8]
 	vmovdqu8	zmm1,ZMMWORD[64+r8]
 	vmovdqu8	zmm2,ZMMWORD[128+r8]
@@ -1432,12 +302,12 @@
 	sub	r8,-4*64
 	add	r9,-4*64
 	cmp	r9,4*64-1
-	ja	NEAR $L$aad_loop_4x__func2
+	ja	NEAR $L$aad_loop_4x__func1
 
 
 	cmp	r9,64
-	jb	NEAR $L$aad_large_done__func2
-$L$aad_loop_1x__func2:
+	jb	NEAR $L$aad_large_done__func1
+$L$aad_loop_1x__func1:
 	vmovdqu8	zmm0,ZMMWORD[r8]
 	vpshufb	zmm0,zmm0,zmm4
 	vpxord	zmm5,zmm5,zmm0
@@ -1462,19 +332,19 @@
 	add	r8,64
 	sub	r9,64
 	cmp	r9,64
-	jae	NEAR $L$aad_loop_1x__func2
+	jae	NEAR $L$aad_loop_1x__func1
 
-$L$aad_large_done__func2:
+$L$aad_large_done__func1:
 
 
 	vzeroupper
 
 
-$L$aad_blockbyblock__func2:
+$L$aad_blockbyblock__func1:
 	test	r9,r9
-	jz	NEAR $L$aad_done__func2
+	jz	NEAR $L$aad_done__func1
 	vmovdqu	xmm9,XMMWORD[((256-16))+rdx]
-$L$aad_loop_blockbyblock__func2:
+$L$aad_loop_blockbyblock__func1:
 	vmovdqu	xmm0,XMMWORD[r8]
 	vpshufb	xmm0,xmm0,xmm4
 	vpxor	xmm5,xmm5,xmm0
@@ -1492,9 +362,9 @@
 
 	add	r8,16
 	sub	r9,16
-	jnz	NEAR $L$aad_loop_blockbyblock__func2
+	jnz	NEAR $L$aad_loop_blockbyblock__func1
 
-$L$aad_done__func2:
+$L$aad_done__func1:
 
 	vpshufb	xmm5,xmm5,xmm4
 	vmovdqu	XMMWORD[rcx],xmm5
@@ -1588,7 +458,7 @@
 
 
 	cmp	r8,4*64-1
-	jbe	NEAR $L$crypt_loop_4x_done__func3
+	jbe	NEAR $L$crypt_loop_4x_done__func1
 
 
 	vmovdqu8	zmm27,ZMMWORD[((256-256))+rdi]
@@ -1615,7 +485,7 @@
 	vpxord	zmm3,zmm3,zmm13
 
 	lea	rax,[16+r9]
-$L$vaesenc_loop_first_4_vecs__func3:
+$L$vaesenc_loop_first_4_vecs__func1:
 	vbroadcasti32x4	zmm9,ZMMWORD[rax]
 	vaesenc	zmm0,zmm0,zmm9
 	vaesenc	zmm1,zmm1,zmm9
@@ -1624,7 +494,7 @@
 
 	add	rax,16
 	cmp	r11,rax
-	jne	NEAR $L$vaesenc_loop_first_4_vecs__func3
+	jne	NEAR $L$vaesenc_loop_first_4_vecs__func1
 
 
 
@@ -1650,7 +520,7 @@
 	sub	rdx,-4*64
 	add	r8,-4*64
 	cmp	r8,4*64-1
-	jbe	NEAR $L$ghash_last_ciphertext_4x__func3
+	jbe	NEAR $L$ghash_last_ciphertext_4x__func1
 	vbroadcasti32x4	zmm15,ZMMWORD[((-144))+r11]
 	vbroadcasti32x4	zmm16,ZMMWORD[((-128))+r11]
 	vbroadcasti32x4	zmm17,ZMMWORD[((-112))+r11]
@@ -1660,7 +530,7 @@
 	vbroadcasti32x4	zmm21,ZMMWORD[((-48))+r11]
 	vbroadcasti32x4	zmm22,ZMMWORD[((-32))+r11]
 	vbroadcasti32x4	zmm23,ZMMWORD[((-16))+r11]
-$L$crypt_loop_4x__func3:
+$L$crypt_loop_4x__func1:
 
 
 
@@ -1680,8 +550,8 @@
 	vpxord	zmm3,zmm3,zmm13
 
 	cmp	r10d,24
-	jl	NEAR $L$aes128__func3
-	je	NEAR $L$aes192__func3
+	jl	NEAR $L$aes128__func1
+	je	NEAR $L$aes192__func1
 
 	vbroadcasti32x4	zmm9,ZMMWORD[((-208))+r11]
 	vaesenc	zmm0,zmm0,zmm9
@@ -1695,7 +565,7 @@
 	vaesenc	zmm2,zmm2,zmm9
 	vaesenc	zmm3,zmm3,zmm9
 
-$L$aes192__func3:
+$L$aes192__func1:
 	vbroadcasti32x4	zmm9,ZMMWORD[((-176))+r11]
 	vaesenc	zmm0,zmm0,zmm9
 	vaesenc	zmm1,zmm1,zmm9
@@ -1708,7 +578,7 @@
 	vaesenc	zmm2,zmm2,zmm9
 	vaesenc	zmm3,zmm3,zmm9
 
-$L$aes128__func3:
+$L$aes128__func1:
 	vpshufb	zmm4,zmm4,zmm8
 	vpxord	zmm4,zmm4,zmm10
 	vpshufb	zmm5,zmm5,zmm8
@@ -1829,8 +699,8 @@
 	sub	rdx,-4*64
 	add	r8,-4*64
 	cmp	r8,4*64-1
-	ja	NEAR $L$crypt_loop_4x__func3
-$L$ghash_last_ciphertext_4x__func3:
+	ja	NEAR $L$crypt_loop_4x__func1
+$L$ghash_last_ciphertext_4x__func1:
 	vpshufb	zmm4,zmm4,zmm8
 	vpxord	zmm4,zmm4,zmm10
 	vpshufb	zmm5,zmm5,zmm8
@@ -1872,10 +742,10 @@
 	vpxord	xmm10,xmm10,xmm4
 	vpternlogd	xmm10,xmm6,xmm5,0x96
 
-$L$crypt_loop_4x_done__func3:
+$L$crypt_loop_4x_done__func1:
 
 	test	r8,r8
-	jz	NEAR $L$done__func3
+	jz	NEAR $L$done__func1
 
 
 
@@ -1905,9 +775,9 @@
 	vpxor	xmm6,xmm6,xmm6
 
 	cmp	r8,64
-	jb	NEAR $L$partial_vec__func3
+	jb	NEAR $L$partial_vec__func1
 
-$L$crypt_loop_1x__func3:
+$L$crypt_loop_1x__func1:
 
 
 
@@ -1915,12 +785,12 @@
 	vpaddd	zmm12,zmm12,zmm11
 	vpxord	zmm0,zmm0,zmm13
 	lea	rax,[16+r9]
-$L$vaesenc_loop_tail_full_vec__func3:
+$L$vaesenc_loop_tail_full_vec__func1:
 	vbroadcasti32x4	zmm9,ZMMWORD[rax]
 	vaesenc	zmm0,zmm0,zmm9
 	add	rax,16
 	cmp	r11,rax
-	jne	NEAR $L$vaesenc_loop_tail_full_vec__func3
+	jne	NEAR $L$vaesenc_loop_tail_full_vec__func1
 	vaesenclast	zmm0,zmm0,zmm14
 
 
@@ -1947,12 +817,12 @@
 	add	rdx,64
 	sub	r8,64
 	cmp	r8,64
-	jae	NEAR $L$crypt_loop_1x__func3
+	jae	NEAR $L$crypt_loop_1x__func1
 
 	test	r8,r8
-	jz	NEAR $L$reduce__func3
+	jz	NEAR $L$reduce__func1
 
-$L$partial_vec__func3:
+$L$partial_vec__func1:
 
 
 
@@ -1971,12 +841,12 @@
 	vpshufb	zmm0,zmm12,zmm8
 	vpxord	zmm0,zmm0,zmm13
 	lea	rax,[16+r9]
-$L$vaesenc_loop_tail_partialvec__func3:
+$L$vaesenc_loop_tail_partialvec__func1:
 	vbroadcasti32x4	zmm9,ZMMWORD[rax]
 	vaesenc	zmm0,zmm0,zmm9
 	add	rax,16
 	cmp	r11,rax
-	jne	NEAR $L$vaesenc_loop_tail_partialvec__func3
+	jne	NEAR $L$vaesenc_loop_tail_partialvec__func1
 	vaesenclast	zmm0,zmm0,zmm14
 
 
@@ -2009,7 +879,7 @@
 	vpxord	zmm6,zmm6,zmm3
 
 
-$L$reduce__func3:
+$L$reduce__func1:
 
 	vpclmulqdq	zmm0,zmm31,zmm4,0x01
 	vpshufd	zmm4,zmm4,0x4e
@@ -2025,7 +895,7 @@
 	vpternlogd	xmm10,xmm2,xmm1,0x96
 
 
-$L$done__func3:
+$L$done__func1:
 
 	vpshufb	xmm10,xmm10,xmm8
 	vmovdqu	XMMWORD[r12],xmm10
@@ -2122,7 +992,7 @@
 
 
 	cmp	r8,4*64-1
-	jbe	NEAR $L$crypt_loop_4x_done__func4
+	jbe	NEAR $L$crypt_loop_4x_done__func2
 
 
 	vmovdqu8	zmm27,ZMMWORD[((256-256))+rdi]
@@ -2138,7 +1008,7 @@
 	vbroadcasti32x4	zmm21,ZMMWORD[((-48))+r11]
 	vbroadcasti32x4	zmm22,ZMMWORD[((-32))+r11]
 	vbroadcasti32x4	zmm23,ZMMWORD[((-16))+r11]
-$L$crypt_loop_4x__func4:
+$L$crypt_loop_4x__func2:
 	vmovdqu8	zmm4,ZMMWORD[rcx]
 	vmovdqu8	zmm5,ZMMWORD[64+rcx]
 	vmovdqu8	zmm6,ZMMWORD[128+rcx]
@@ -2162,8 +1032,8 @@
 	vpxord	zmm3,zmm3,zmm13
 
 	cmp	r10d,24
-	jl	NEAR $L$aes128__func4
-	je	NEAR $L$aes192__func4
+	jl	NEAR $L$aes128__func2
+	je	NEAR $L$aes192__func2
 
 	vbroadcasti32x4	zmm9,ZMMWORD[((-208))+r11]
 	vaesenc	zmm0,zmm0,zmm9
@@ -2177,7 +1047,7 @@
 	vaesenc	zmm2,zmm2,zmm9
 	vaesenc	zmm3,zmm3,zmm9
 
-$L$aes192__func4:
+$L$aes192__func2:
 	vbroadcasti32x4	zmm9,ZMMWORD[((-176))+r11]
 	vaesenc	zmm0,zmm0,zmm9
 	vaesenc	zmm1,zmm1,zmm9
@@ -2190,7 +1060,7 @@
 	vaesenc	zmm2,zmm2,zmm9
 	vaesenc	zmm3,zmm3,zmm9
 
-$L$aes128__func4:
+$L$aes128__func2:
 	vpshufb	zmm4,zmm4,zmm8
 	vpxord	zmm4,zmm4,zmm10
 	vpshufb	zmm5,zmm5,zmm8
@@ -2311,11 +1181,11 @@
 	sub	rdx,-4*64
 	add	r8,-4*64
 	cmp	r8,4*64-1
-	ja	NEAR $L$crypt_loop_4x__func4
-$L$crypt_loop_4x_done__func4:
+	ja	NEAR $L$crypt_loop_4x__func2
+$L$crypt_loop_4x_done__func2:
 
 	test	r8,r8
-	jz	NEAR $L$done__func4
+	jz	NEAR $L$done__func2
 
 
 
@@ -2345,9 +1215,9 @@
 	vpxor	xmm6,xmm6,xmm6
 
 	cmp	r8,64
-	jb	NEAR $L$partial_vec__func4
+	jb	NEAR $L$partial_vec__func2
 
-$L$crypt_loop_1x__func4:
+$L$crypt_loop_1x__func2:
 
 
 
@@ -2355,12 +1225,12 @@
 	vpaddd	zmm12,zmm12,zmm11
 	vpxord	zmm0,zmm0,zmm13
 	lea	rax,[16+r9]
-$L$vaesenc_loop_tail_full_vec__func4:
+$L$vaesenc_loop_tail_full_vec__func2:
 	vbroadcasti32x4	zmm9,ZMMWORD[rax]
 	vaesenc	zmm0,zmm0,zmm9
 	add	rax,16
 	cmp	r11,rax
-	jne	NEAR $L$vaesenc_loop_tail_full_vec__func4
+	jne	NEAR $L$vaesenc_loop_tail_full_vec__func2
 	vaesenclast	zmm0,zmm0,zmm14
 
 
@@ -2387,12 +1257,12 @@
 	add	rdx,64
 	sub	r8,64
 	cmp	r8,64
-	jae	NEAR $L$crypt_loop_1x__func4
+	jae	NEAR $L$crypt_loop_1x__func2
 
 	test	r8,r8
-	jz	NEAR $L$reduce__func4
+	jz	NEAR $L$reduce__func2
 
-$L$partial_vec__func4:
+$L$partial_vec__func2:
 
 
 
@@ -2411,12 +1281,12 @@
 	vpshufb	zmm0,zmm12,zmm8
 	vpxord	zmm0,zmm0,zmm13
 	lea	rax,[16+r9]
-$L$vaesenc_loop_tail_partialvec__func4:
+$L$vaesenc_loop_tail_partialvec__func2:
 	vbroadcasti32x4	zmm9,ZMMWORD[rax]
 	vaesenc	zmm0,zmm0,zmm9
 	add	rax,16
 	cmp	r11,rax
-	jne	NEAR $L$vaesenc_loop_tail_partialvec__func4
+	jne	NEAR $L$vaesenc_loop_tail_partialvec__func2
 	vaesenclast	zmm0,zmm0,zmm14
 
 
@@ -2449,7 +1319,7 @@
 	vpxord	zmm6,zmm6,zmm3
 
 
-$L$reduce__func4:
+$L$reduce__func2:
 
 	vpclmulqdq	zmm0,zmm31,zmm4,0x01
 	vpshufd	zmm4,zmm4,0x4e
@@ -2465,7 +1335,7 @@
 	vpternlogd	xmm10,xmm2,xmm1,0x96
 
 
-$L$done__func4:
+$L$done__func2:
 
 	vpshufb	xmm10,xmm10,xmm8
 	vmovdqu	XMMWORD[r12],xmm10
@@ -2495,18 +1365,6 @@
 	DD	$L$SEH_end_gcm_gmult_vpclmulqdq_avx10_5 wrt ..imagebase
 	DD	$L$SEH_info_gcm_gmult_vpclmulqdq_avx10_0 wrt ..imagebase
 
-	DD	$L$SEH_begin_gcm_ghash_vpclmulqdq_avx10_256_1 wrt ..imagebase
-	DD	$L$SEH_end_gcm_ghash_vpclmulqdq_avx10_256_12 wrt ..imagebase
-	DD	$L$SEH_info_gcm_ghash_vpclmulqdq_avx10_256_0 wrt ..imagebase
-
-	DD	$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1 wrt ..imagebase
-	DD	$L$SEH_end_aes_gcm_enc_update_vaes_avx10_256_17 wrt ..imagebase
-	DD	$L$SEH_info_aes_gcm_enc_update_vaes_avx10_256_0 wrt ..imagebase
-
-	DD	$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1 wrt ..imagebase
-	DD	$L$SEH_end_aes_gcm_dec_update_vaes_avx10_256_17 wrt ..imagebase
-	DD	$L$SEH_info_aes_gcm_dec_update_vaes_avx10_256_0 wrt ..imagebase
-
 	DD	$L$SEH_begin_gcm_ghash_vpclmulqdq_avx10_512_1 wrt ..imagebase
 	DD	$L$SEH_end_gcm_ghash_vpclmulqdq_avx10_512_12 wrt ..imagebase
 	DD	$L$SEH_info_gcm_ghash_vpclmulqdq_avx10_512_0 wrt ..imagebase
@@ -2534,131 +1392,6 @@
 	DB	34
 
 	DW	0
-$L$SEH_info_gcm_ghash_vpclmulqdq_avx10_256_0:
-	DB	1
-	DB	$L$SEH_endprologue_gcm_ghash_vpclmulqdq_avx10_256_11-$L$SEH_begin_gcm_ghash_vpclmulqdq_avx10_256_1
-	DB	18
-	DB	0
-	DB	$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_10-$L$SEH_begin_gcm_ghash_vpclmulqdq_avx10_256_1
-	DB	216
-	DW	7
-	DB	$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_9-$L$SEH_begin_gcm_ghash_vpclmulqdq_avx10_256_1
-	DB	200
-	DW	6
-	DB	$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_8-$L$SEH_begin_gcm_ghash_vpclmulqdq_avx10_256_1
-	DB	184
-	DW	5
-	DB	$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_7-$L$SEH_begin_gcm_ghash_vpclmulqdq_avx10_256_1
-	DB	168
-	DW	4
-	DB	$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_6-$L$SEH_begin_gcm_ghash_vpclmulqdq_avx10_256_1
-	DB	152
-	DW	3
-	DB	$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_5-$L$SEH_begin_gcm_ghash_vpclmulqdq_avx10_256_1
-	DB	136
-	DW	2
-	DB	$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_4-$L$SEH_begin_gcm_ghash_vpclmulqdq_avx10_256_1
-	DB	120
-	DW	1
-	DB	$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_3-$L$SEH_begin_gcm_ghash_vpclmulqdq_avx10_256_1
-	DB	104
-	DW	0
-	DB	$L$SEH_prologue_gcm_ghash_vpclmulqdq_avx10_256_2-$L$SEH_begin_gcm_ghash_vpclmulqdq_avx10_256_1
-	DB	1
-	DW	17
-
-$L$SEH_info_aes_gcm_enc_update_vaes_avx10_256_0:
-	DB	1
-	DB	$L$SEH_endprologue_aes_gcm_enc_update_vaes_avx10_256_16-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1
-	DB	25
-	DB	0
-	DB	$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_15-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1
-	DB	248
-	DW	9
-	DB	$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_14-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1
-	DB	232
-	DW	8
-	DB	$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_13-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1
-	DB	216
-	DW	7
-	DB	$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_12-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1
-	DB	200
-	DW	6
-	DB	$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_11-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1
-	DB	184
-	DW	5
-	DB	$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_10-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1
-	DB	168
-	DW	4
-	DB	$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_9-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1
-	DB	152
-	DW	3
-	DB	$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_8-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1
-	DB	136
-	DW	2
-	DB	$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_7-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1
-	DB	120
-	DW	1
-	DB	$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_6-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1
-	DB	104
-	DW	0
-	DB	$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_5-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1
-	DB	1
-	DW	20
-	DB	$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_4-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1
-	DB	192
-	DB	$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_3-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1
-	DB	112
-	DB	$L$SEH_prologue_aes_gcm_enc_update_vaes_avx10_256_2-$L$SEH_begin_aes_gcm_enc_update_vaes_avx10_256_1
-	DB	96
-
-	DW	0
-$L$SEH_info_aes_gcm_dec_update_vaes_avx10_256_0:
-	DB	1
-	DB	$L$SEH_endprologue_aes_gcm_dec_update_vaes_avx10_256_16-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1
-	DB	25
-	DB	0
-	DB	$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_15-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1
-	DB	248
-	DW	9
-	DB	$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_14-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1
-	DB	232
-	DW	8
-	DB	$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_13-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1
-	DB	216
-	DW	7
-	DB	$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_12-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1
-	DB	200
-	DW	6
-	DB	$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_11-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1
-	DB	184
-	DW	5
-	DB	$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_10-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1
-	DB	168
-	DW	4
-	DB	$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_9-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1
-	DB	152
-	DW	3
-	DB	$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_8-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1
-	DB	136
-	DW	2
-	DB	$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_7-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1
-	DB	120
-	DW	1
-	DB	$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_6-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1
-	DB	104
-	DW	0
-	DB	$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_5-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1
-	DB	1
-	DW	20
-	DB	$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_4-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1
-	DB	192
-	DB	$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_3-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1
-	DB	112
-	DB	$L$SEH_prologue_aes_gcm_dec_update_vaes_avx10_256_2-$L$SEH_begin_aes_gcm_dec_update_vaes_avx10_256_1
-	DB	96
-
-	DW	0
 $L$SEH_info_gcm_ghash_vpclmulqdq_avx10_512_0:
 	DB	1
 	DB	$L$SEH_endprologue_gcm_ghash_vpclmulqdq_avx10_512_11-$L$SEH_begin_gcm_ghash_vpclmulqdq_avx10_512_1