|  | // This file is generated from a similarly-named Perl script in the BoringSSL | 
|  | // source tree. Do not edit by hand. | 
|  |  | 
|  | #include <openssl/asm_base.h> | 
|  |  | 
|  | #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) | 
|  | .section	.rodata | 
|  | .align	64 | 
|  |  | 
|  |  | 
|  | .Lbswap_mask: | 
|  | .quad	0x08090a0b0c0d0e0f, 0x0001020304050607 | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | .Lgfpoly: | 
|  | .quad	1, 0xc200000000000000 | 
|  |  | 
|  |  | 
|  | .Lgfpoly_and_internal_carrybit: | 
|  | .quad	1, 0xc200000000000001 | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | .Lctr_pattern: | 
|  | .quad	0, 0 | 
|  | .quad	1, 0 | 
|  | .Linc_2blocks: | 
|  | .quad	2, 0 | 
|  | .quad	3, 0 | 
|  | .Linc_4blocks: | 
|  | .quad	4, 0 | 
|  |  | 
|  | .text | 
|  | .globl	gcm_gmult_vpclmulqdq_avx10 | 
|  | .hidden gcm_gmult_vpclmulqdq_avx10 | 
|  | .type	gcm_gmult_vpclmulqdq_avx10,@function | 
|  | .align	32 | 
|  | gcm_gmult_vpclmulqdq_avx10: | 
|  | .cfi_startproc | 
|  |  | 
|  | _CET_ENDBR | 
|  |  | 
|  |  | 
|  |  | 
|  | vmovdqu	(%rdi),%xmm0 | 
|  | vmovdqu	.Lbswap_mask(%rip),%xmm1 | 
|  | vmovdqu	256-16(%rsi),%xmm2 | 
|  | vmovdqu	.Lgfpoly(%rip),%xmm3 | 
|  | vpshufb	%xmm1,%xmm0,%xmm0 | 
|  |  | 
|  | vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm4 | 
|  | vpclmulqdq	$0x01,%xmm2,%xmm0,%xmm5 | 
|  | vpclmulqdq	$0x10,%xmm2,%xmm0,%xmm6 | 
|  | vpxord	%xmm6,%xmm5,%xmm5 | 
|  | vpclmulqdq	$0x01,%xmm4,%xmm3,%xmm6 | 
|  | vpshufd	$0x4e,%xmm4,%xmm4 | 
|  | vpternlogd	$0x96,%xmm6,%xmm4,%xmm5 | 
|  | vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm0 | 
|  | vpclmulqdq	$0x01,%xmm5,%xmm3,%xmm4 | 
|  | vpshufd	$0x4e,%xmm5,%xmm5 | 
|  | vpternlogd	$0x96,%xmm4,%xmm5,%xmm0 | 
|  |  | 
|  |  | 
|  | vpshufb	%xmm1,%xmm0,%xmm0 | 
|  | vmovdqu	%xmm0,(%rdi) | 
|  | ret | 
|  |  | 
|  | .cfi_endproc | 
|  | .size	gcm_gmult_vpclmulqdq_avx10, . - gcm_gmult_vpclmulqdq_avx10 | 
|  | .globl	gcm_init_vpclmulqdq_avx10_512 | 
|  | .hidden gcm_init_vpclmulqdq_avx10_512 | 
|  | .type	gcm_init_vpclmulqdq_avx10_512,@function | 
|  | .align	32 | 
|  | gcm_init_vpclmulqdq_avx10_512: | 
|  | .cfi_startproc | 
|  |  | 
|  | _CET_ENDBR | 
|  |  | 
|  | leaq	256-64(%rdi),%r8 | 
|  |  | 
|  |  | 
|  |  | 
|  | vpshufd	$0x4e,(%rsi),%xmm3 | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | vpshufd	$0xd3,%xmm3,%xmm0 | 
|  | vpsrad	$31,%xmm0,%xmm0 | 
|  | vpaddq	%xmm3,%xmm3,%xmm3 | 
|  |  | 
|  | vpternlogd	$0x78,.Lgfpoly_and_internal_carrybit(%rip),%xmm0,%xmm3 | 
|  |  | 
|  |  | 
|  | vbroadcasti32x4	.Lgfpoly(%rip),%zmm5 | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | vpclmulqdq	$0x00,%xmm3,%xmm3,%xmm0 | 
|  | vpclmulqdq	$0x01,%xmm3,%xmm3,%xmm1 | 
|  | vpclmulqdq	$0x10,%xmm3,%xmm3,%xmm2 | 
|  | vpxord	%xmm2,%xmm1,%xmm1 | 
|  | vpclmulqdq	$0x01,%xmm0,%xmm5,%xmm2 | 
|  | vpshufd	$0x4e,%xmm0,%xmm0 | 
|  | vpternlogd	$0x96,%xmm2,%xmm0,%xmm1 | 
|  | vpclmulqdq	$0x11,%xmm3,%xmm3,%xmm4 | 
|  | vpclmulqdq	$0x01,%xmm1,%xmm5,%xmm0 | 
|  | vpshufd	$0x4e,%xmm1,%xmm1 | 
|  | vpternlogd	$0x96,%xmm0,%xmm1,%xmm4 | 
|  |  | 
|  |  | 
|  |  | 
|  | vinserti128	$1,%xmm3,%ymm4,%ymm3 | 
|  | vinserti128	$1,%xmm4,%ymm4,%ymm4 | 
|  | vpclmulqdq	$0x00,%ymm4,%ymm3,%ymm0 | 
|  | vpclmulqdq	$0x01,%ymm4,%ymm3,%ymm1 | 
|  | vpclmulqdq	$0x10,%ymm4,%ymm3,%ymm2 | 
|  | vpxord	%ymm2,%ymm1,%ymm1 | 
|  | vpclmulqdq	$0x01,%ymm0,%ymm5,%ymm2 | 
|  | vpshufd	$0x4e,%ymm0,%ymm0 | 
|  | vpternlogd	$0x96,%ymm2,%ymm0,%ymm1 | 
|  | vpclmulqdq	$0x11,%ymm4,%ymm3,%ymm4 | 
|  | vpclmulqdq	$0x01,%ymm1,%ymm5,%ymm0 | 
|  | vpshufd	$0x4e,%ymm1,%ymm1 | 
|  | vpternlogd	$0x96,%ymm0,%ymm1,%ymm4 | 
|  |  | 
|  | vinserti64x4	$1,%ymm3,%zmm4,%zmm3 | 
|  | vshufi64x2	$0,%zmm4,%zmm4,%zmm4 | 
|  |  | 
|  | vmovdqu8	%zmm3,(%r8) | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | movl	$3,%eax | 
|  | .Lprecompute_next__func1: | 
|  | subq	$64,%r8 | 
|  | vpclmulqdq	$0x00,%zmm4,%zmm3,%zmm0 | 
|  | vpclmulqdq	$0x01,%zmm4,%zmm3,%zmm1 | 
|  | vpclmulqdq	$0x10,%zmm4,%zmm3,%zmm2 | 
|  | vpxord	%zmm2,%zmm1,%zmm1 | 
|  | vpclmulqdq	$0x01,%zmm0,%zmm5,%zmm2 | 
|  | vpshufd	$0x4e,%zmm0,%zmm0 | 
|  | vpternlogd	$0x96,%zmm2,%zmm0,%zmm1 | 
|  | vpclmulqdq	$0x11,%zmm4,%zmm3,%zmm3 | 
|  | vpclmulqdq	$0x01,%zmm1,%zmm5,%zmm0 | 
|  | vpshufd	$0x4e,%zmm1,%zmm1 | 
|  | vpternlogd	$0x96,%zmm0,%zmm1,%zmm3 | 
|  |  | 
|  | vmovdqu8	%zmm3,(%r8) | 
|  | decl	%eax | 
|  | jnz	.Lprecompute_next__func1 | 
|  |  | 
|  | vzeroupper | 
|  | ret | 
|  |  | 
|  | .cfi_endproc | 
|  | .size	gcm_init_vpclmulqdq_avx10_512, . - gcm_init_vpclmulqdq_avx10_512 | 
|  | .globl	gcm_ghash_vpclmulqdq_avx10_512 | 
|  | .hidden gcm_ghash_vpclmulqdq_avx10_512 | 
|  | .type	gcm_ghash_vpclmulqdq_avx10_512,@function | 
|  | .align	32 | 
|  | gcm_ghash_vpclmulqdq_avx10_512: | 
|  | .cfi_startproc | 
|  |  | 
|  | _CET_ENDBR | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | vmovdqu	.Lbswap_mask(%rip),%xmm4 | 
|  | vmovdqu	.Lgfpoly(%rip),%xmm10 | 
|  |  | 
|  |  | 
|  | vmovdqu	(%rdi),%xmm5 | 
|  | vpshufb	%xmm4,%xmm5,%xmm5 | 
|  |  | 
|  |  | 
|  | cmpq	$64,%rcx | 
|  | jb	.Laad_blockbyblock__func1 | 
|  |  | 
|  |  | 
|  |  | 
|  | vshufi64x2	$0,%zmm4,%zmm4,%zmm4 | 
|  | vshufi64x2	$0,%zmm10,%zmm10,%zmm10 | 
|  |  | 
|  |  | 
|  | vmovdqu8	256-64(%rsi),%zmm9 | 
|  |  | 
|  | cmpq	$256-1,%rcx | 
|  | jbe	.Laad_loop_1x__func1 | 
|  |  | 
|  |  | 
|  | vmovdqu8	256-256(%rsi),%zmm6 | 
|  | vmovdqu8	256-192(%rsi),%zmm7 | 
|  | vmovdqu8	256-128(%rsi),%zmm8 | 
|  |  | 
|  |  | 
|  | .Laad_loop_4x__func1: | 
|  | vmovdqu8	0(%rdx),%zmm0 | 
|  | vmovdqu8	64(%rdx),%zmm1 | 
|  | vmovdqu8	128(%rdx),%zmm2 | 
|  | vmovdqu8	192(%rdx),%zmm3 | 
|  | vpshufb	%zmm4,%zmm0,%zmm0 | 
|  | vpxord	%zmm5,%zmm0,%zmm0 | 
|  | vpshufb	%zmm4,%zmm1,%zmm1 | 
|  | vpshufb	%zmm4,%zmm2,%zmm2 | 
|  | vpshufb	%zmm4,%zmm3,%zmm3 | 
|  | vpclmulqdq	$0x00,%zmm6,%zmm0,%zmm5 | 
|  | vpclmulqdq	$0x00,%zmm7,%zmm1,%zmm11 | 
|  | vpclmulqdq	$0x00,%zmm8,%zmm2,%zmm12 | 
|  | vpxord	%zmm11,%zmm5,%zmm5 | 
|  | vpclmulqdq	$0x00,%zmm9,%zmm3,%zmm13 | 
|  | vpternlogd	$0x96,%zmm13,%zmm12,%zmm5 | 
|  | vpclmulqdq	$0x01,%zmm6,%zmm0,%zmm11 | 
|  | vpclmulqdq	$0x01,%zmm7,%zmm1,%zmm12 | 
|  | vpclmulqdq	$0x01,%zmm8,%zmm2,%zmm13 | 
|  | vpternlogd	$0x96,%zmm13,%zmm12,%zmm11 | 
|  | vpclmulqdq	$0x01,%zmm9,%zmm3,%zmm12 | 
|  | vpclmulqdq	$0x10,%zmm6,%zmm0,%zmm13 | 
|  | vpternlogd	$0x96,%zmm13,%zmm12,%zmm11 | 
|  | vpclmulqdq	$0x10,%zmm7,%zmm1,%zmm12 | 
|  | vpclmulqdq	$0x10,%zmm8,%zmm2,%zmm13 | 
|  | vpternlogd	$0x96,%zmm13,%zmm12,%zmm11 | 
|  | vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm13 | 
|  | vpclmulqdq	$0x10,%zmm9,%zmm3,%zmm12 | 
|  | vpxord	%zmm12,%zmm11,%zmm11 | 
|  | vpshufd	$0x4e,%zmm5,%zmm5 | 
|  | vpclmulqdq	$0x11,%zmm6,%zmm0,%zmm0 | 
|  | vpclmulqdq	$0x11,%zmm7,%zmm1,%zmm1 | 
|  | vpclmulqdq	$0x11,%zmm8,%zmm2,%zmm2 | 
|  | vpternlogd	$0x96,%zmm13,%zmm5,%zmm11 | 
|  | vpclmulqdq	$0x11,%zmm9,%zmm3,%zmm3 | 
|  | vpternlogd	$0x96,%zmm2,%zmm1,%zmm0 | 
|  | vpclmulqdq	$0x01,%zmm11,%zmm10,%zmm12 | 
|  | vpxord	%zmm3,%zmm0,%zmm5 | 
|  | vpshufd	$0x4e,%zmm11,%zmm11 | 
|  | vpternlogd	$0x96,%zmm12,%zmm11,%zmm5 | 
|  | vextracti32x4	$1,%zmm5,%xmm0 | 
|  | vextracti32x4	$2,%zmm5,%xmm1 | 
|  | vextracti32x4	$3,%zmm5,%xmm2 | 
|  | vpxord	%xmm0,%xmm5,%xmm5 | 
|  | vpternlogd	$0x96,%xmm1,%xmm2,%xmm5 | 
|  |  | 
|  | subq	$-256,%rdx | 
|  | addq	$-256,%rcx | 
|  | cmpq	$256-1,%rcx | 
|  | ja	.Laad_loop_4x__func1 | 
|  |  | 
|  |  | 
|  | cmpq	$64,%rcx | 
|  | jb	.Laad_large_done__func1 | 
|  | .Laad_loop_1x__func1: | 
|  | vmovdqu8	(%rdx),%zmm0 | 
|  | vpshufb	%zmm4,%zmm0,%zmm0 | 
|  | vpxord	%zmm0,%zmm5,%zmm5 | 
|  | vpclmulqdq	$0x00,%zmm9,%zmm5,%zmm0 | 
|  | vpclmulqdq	$0x01,%zmm9,%zmm5,%zmm1 | 
|  | vpclmulqdq	$0x10,%zmm9,%zmm5,%zmm2 | 
|  | vpxord	%zmm2,%zmm1,%zmm1 | 
|  | vpclmulqdq	$0x01,%zmm0,%zmm10,%zmm2 | 
|  | vpshufd	$0x4e,%zmm0,%zmm0 | 
|  | vpternlogd	$0x96,%zmm2,%zmm0,%zmm1 | 
|  | vpclmulqdq	$0x11,%zmm9,%zmm5,%zmm5 | 
|  | vpclmulqdq	$0x01,%zmm1,%zmm10,%zmm0 | 
|  | vpshufd	$0x4e,%zmm1,%zmm1 | 
|  | vpternlogd	$0x96,%zmm0,%zmm1,%zmm5 | 
|  |  | 
|  | vextracti32x4	$1,%zmm5,%xmm0 | 
|  | vextracti32x4	$2,%zmm5,%xmm1 | 
|  | vextracti32x4	$3,%zmm5,%xmm2 | 
|  | vpxord	%xmm0,%xmm5,%xmm5 | 
|  | vpternlogd	$0x96,%xmm1,%xmm2,%xmm5 | 
|  |  | 
|  | addq	$64,%rdx | 
|  | subq	$64,%rcx | 
|  | cmpq	$64,%rcx | 
|  | jae	.Laad_loop_1x__func1 | 
|  |  | 
|  | .Laad_large_done__func1: | 
|  |  | 
|  |  | 
|  | vzeroupper | 
|  |  | 
|  |  | 
|  | .Laad_blockbyblock__func1: | 
|  | testq	%rcx,%rcx | 
|  | jz	.Laad_done__func1 | 
|  | vmovdqu	256-16(%rsi),%xmm9 | 
|  | .Laad_loop_blockbyblock__func1: | 
|  | vmovdqu	(%rdx),%xmm0 | 
|  | vpshufb	%xmm4,%xmm0,%xmm0 | 
|  | vpxor	%xmm0,%xmm5,%xmm5 | 
|  | vpclmulqdq	$0x00,%xmm9,%xmm5,%xmm0 | 
|  | vpclmulqdq	$0x01,%xmm9,%xmm5,%xmm1 | 
|  | vpclmulqdq	$0x10,%xmm9,%xmm5,%xmm2 | 
|  | vpxord	%xmm2,%xmm1,%xmm1 | 
|  | vpclmulqdq	$0x01,%xmm0,%xmm10,%xmm2 | 
|  | vpshufd	$0x4e,%xmm0,%xmm0 | 
|  | vpternlogd	$0x96,%xmm2,%xmm0,%xmm1 | 
|  | vpclmulqdq	$0x11,%xmm9,%xmm5,%xmm5 | 
|  | vpclmulqdq	$0x01,%xmm1,%xmm10,%xmm0 | 
|  | vpshufd	$0x4e,%xmm1,%xmm1 | 
|  | vpternlogd	$0x96,%xmm0,%xmm1,%xmm5 | 
|  |  | 
|  | addq	$16,%rdx | 
|  | subq	$16,%rcx | 
|  | jnz	.Laad_loop_blockbyblock__func1 | 
|  |  | 
|  | .Laad_done__func1: | 
|  |  | 
|  | vpshufb	%xmm4,%xmm5,%xmm5 | 
|  | vmovdqu	%xmm5,(%rdi) | 
|  | ret | 
|  |  | 
|  | .cfi_endproc | 
|  | .size	gcm_ghash_vpclmulqdq_avx10_512, . - gcm_ghash_vpclmulqdq_avx10_512 | 
|  | .globl	aes_gcm_enc_update_vaes_avx10_512 | 
|  | .hidden aes_gcm_enc_update_vaes_avx10_512 | 
|  | .type	aes_gcm_enc_update_vaes_avx10_512,@function | 
|  | .align	32 | 
|  | aes_gcm_enc_update_vaes_avx10_512: | 
|  | .cfi_startproc | 
|  |  | 
|  | _CET_ENDBR | 
|  | pushq	%r12 | 
|  | .cfi_adjust_cfa_offset	8 | 
|  | .cfi_offset	%r12,-16 | 
|  |  | 
|  | movq	16(%rsp),%r12 | 
|  | #ifdef BORINGSSL_DISPATCH_TEST | 
|  | .extern	BORINGSSL_function_hit | 
|  | .hidden BORINGSSL_function_hit | 
|  | movb	$1,BORINGSSL_function_hit+7(%rip) | 
|  | #endif | 
|  |  | 
|  | vbroadcasti32x4	.Lbswap_mask(%rip),%zmm8 | 
|  | vbroadcasti32x4	.Lgfpoly(%rip),%zmm31 | 
|  |  | 
|  |  | 
|  |  | 
|  | vmovdqu	(%r12),%xmm10 | 
|  | vpshufb	%xmm8,%xmm10,%xmm10 | 
|  | vbroadcasti32x4	(%r8),%zmm12 | 
|  | vpshufb	%zmm8,%zmm12,%zmm12 | 
|  |  | 
|  |  | 
|  |  | 
|  | movl	240(%rcx),%r10d | 
|  | leal	-20(,%r10,4),%r10d | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | leaq	96(%rcx,%r10,4),%r11 | 
|  | vbroadcasti32x4	(%rcx),%zmm13 | 
|  | vbroadcasti32x4	(%r11),%zmm14 | 
|  |  | 
|  |  | 
|  | vpaddd	.Lctr_pattern(%rip),%zmm12,%zmm12 | 
|  |  | 
|  |  | 
|  | vbroadcasti32x4	.Linc_4blocks(%rip),%zmm11 | 
|  |  | 
|  |  | 
|  |  | 
|  | cmpq	$256-1,%rdx | 
|  | jbe	.Lcrypt_loop_4x_done__func1 | 
|  |  | 
|  |  | 
|  | vmovdqu8	256-256(%r9),%zmm27 | 
|  | vmovdqu8	256-192(%r9),%zmm28 | 
|  | vmovdqu8	256-128(%r9),%zmm29 | 
|  | vmovdqu8	256-64(%r9),%zmm30 | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | vpshufb	%zmm8,%zmm12,%zmm0 | 
|  | vpaddd	%zmm11,%zmm12,%zmm12 | 
|  | vpshufb	%zmm8,%zmm12,%zmm1 | 
|  | vpaddd	%zmm11,%zmm12,%zmm12 | 
|  | vpshufb	%zmm8,%zmm12,%zmm2 | 
|  | vpaddd	%zmm11,%zmm12,%zmm12 | 
|  | vpshufb	%zmm8,%zmm12,%zmm3 | 
|  | vpaddd	%zmm11,%zmm12,%zmm12 | 
|  |  | 
|  |  | 
|  | vpxord	%zmm13,%zmm0,%zmm0 | 
|  | vpxord	%zmm13,%zmm1,%zmm1 | 
|  | vpxord	%zmm13,%zmm2,%zmm2 | 
|  | vpxord	%zmm13,%zmm3,%zmm3 | 
|  |  | 
|  | leaq	16(%rcx),%rax | 
|  | .Lvaesenc_loop_first_4_vecs__func1: | 
|  | vbroadcasti32x4	(%rax),%zmm9 | 
|  | vaesenc	%zmm9,%zmm0,%zmm0 | 
|  | vaesenc	%zmm9,%zmm1,%zmm1 | 
|  | vaesenc	%zmm9,%zmm2,%zmm2 | 
|  | vaesenc	%zmm9,%zmm3,%zmm3 | 
|  |  | 
|  | addq	$16,%rax | 
|  | cmpq	%rax,%r11 | 
|  | jne	.Lvaesenc_loop_first_4_vecs__func1 | 
|  |  | 
|  |  | 
|  |  | 
|  | vpxord	0(%rdi),%zmm14,%zmm4 | 
|  | vpxord	64(%rdi),%zmm14,%zmm5 | 
|  | vpxord	128(%rdi),%zmm14,%zmm6 | 
|  | vpxord	192(%rdi),%zmm14,%zmm7 | 
|  |  | 
|  |  | 
|  |  | 
|  | vaesenclast	%zmm4,%zmm0,%zmm4 | 
|  | vaesenclast	%zmm5,%zmm1,%zmm5 | 
|  | vaesenclast	%zmm6,%zmm2,%zmm6 | 
|  | vaesenclast	%zmm7,%zmm3,%zmm7 | 
|  |  | 
|  |  | 
|  | vmovdqu8	%zmm4,0(%rsi) | 
|  | vmovdqu8	%zmm5,64(%rsi) | 
|  | vmovdqu8	%zmm6,128(%rsi) | 
|  | vmovdqu8	%zmm7,192(%rsi) | 
|  |  | 
|  | subq	$-256,%rdi | 
|  | subq	$-256,%rsi | 
|  | addq	$-256,%rdx | 
|  | cmpq	$256-1,%rdx | 
|  | jbe	.Lghash_last_ciphertext_4x__func1 | 
|  | vbroadcasti32x4	-144(%r11),%zmm15 | 
|  | vbroadcasti32x4	-128(%r11),%zmm16 | 
|  | vbroadcasti32x4	-112(%r11),%zmm17 | 
|  | vbroadcasti32x4	-96(%r11),%zmm18 | 
|  | vbroadcasti32x4	-80(%r11),%zmm19 | 
|  | vbroadcasti32x4	-64(%r11),%zmm20 | 
|  | vbroadcasti32x4	-48(%r11),%zmm21 | 
|  | vbroadcasti32x4	-32(%r11),%zmm22 | 
|  | vbroadcasti32x4	-16(%r11),%zmm23 | 
|  | .Lcrypt_loop_4x__func1: | 
|  |  | 
|  |  | 
|  |  | 
|  | vpshufb	%zmm8,%zmm12,%zmm0 | 
|  | vpaddd	%zmm11,%zmm12,%zmm12 | 
|  | vpshufb	%zmm8,%zmm12,%zmm1 | 
|  | vpaddd	%zmm11,%zmm12,%zmm12 | 
|  | vpshufb	%zmm8,%zmm12,%zmm2 | 
|  | vpaddd	%zmm11,%zmm12,%zmm12 | 
|  | vpshufb	%zmm8,%zmm12,%zmm3 | 
|  | vpaddd	%zmm11,%zmm12,%zmm12 | 
|  |  | 
|  |  | 
|  | vpxord	%zmm13,%zmm0,%zmm0 | 
|  | vpxord	%zmm13,%zmm1,%zmm1 | 
|  | vpxord	%zmm13,%zmm2,%zmm2 | 
|  | vpxord	%zmm13,%zmm3,%zmm3 | 
|  |  | 
|  | cmpl	$24,%r10d | 
|  | jl	.Laes128__func1 | 
|  | je	.Laes192__func1 | 
|  |  | 
|  | vbroadcasti32x4	-208(%r11),%zmm9 | 
|  | vaesenc	%zmm9,%zmm0,%zmm0 | 
|  | vaesenc	%zmm9,%zmm1,%zmm1 | 
|  | vaesenc	%zmm9,%zmm2,%zmm2 | 
|  | vaesenc	%zmm9,%zmm3,%zmm3 | 
|  |  | 
|  | vbroadcasti32x4	-192(%r11),%zmm9 | 
|  | vaesenc	%zmm9,%zmm0,%zmm0 | 
|  | vaesenc	%zmm9,%zmm1,%zmm1 | 
|  | vaesenc	%zmm9,%zmm2,%zmm2 | 
|  | vaesenc	%zmm9,%zmm3,%zmm3 | 
|  |  | 
|  | .Laes192__func1: | 
|  | vbroadcasti32x4	-176(%r11),%zmm9 | 
|  | vaesenc	%zmm9,%zmm0,%zmm0 | 
|  | vaesenc	%zmm9,%zmm1,%zmm1 | 
|  | vaesenc	%zmm9,%zmm2,%zmm2 | 
|  | vaesenc	%zmm9,%zmm3,%zmm3 | 
|  |  | 
|  | vbroadcasti32x4	-160(%r11),%zmm9 | 
|  | vaesenc	%zmm9,%zmm0,%zmm0 | 
|  | vaesenc	%zmm9,%zmm1,%zmm1 | 
|  | vaesenc	%zmm9,%zmm2,%zmm2 | 
|  | vaesenc	%zmm9,%zmm3,%zmm3 | 
|  |  | 
|  | .Laes128__func1: | 
|  | prefetcht0	512+0(%rdi) | 
|  | prefetcht0	512+64(%rdi) | 
|  | prefetcht0	512+128(%rdi) | 
|  | prefetcht0	512+192(%rdi) | 
|  | vpshufb	%zmm8,%zmm4,%zmm4 | 
|  | vpxord	%zmm10,%zmm4,%zmm4 | 
|  | vpshufb	%zmm8,%zmm5,%zmm5 | 
|  | vpshufb	%zmm8,%zmm6,%zmm6 | 
|  |  | 
|  | vaesenc	%zmm15,%zmm0,%zmm0 | 
|  | vaesenc	%zmm15,%zmm1,%zmm1 | 
|  | vaesenc	%zmm15,%zmm2,%zmm2 | 
|  | vaesenc	%zmm15,%zmm3,%zmm3 | 
|  |  | 
|  | vpshufb	%zmm8,%zmm7,%zmm7 | 
|  | vpclmulqdq	$0x00,%zmm27,%zmm4,%zmm10 | 
|  | vpclmulqdq	$0x00,%zmm28,%zmm5,%zmm24 | 
|  | vpclmulqdq	$0x00,%zmm29,%zmm6,%zmm25 | 
|  |  | 
|  | vaesenc	%zmm16,%zmm0,%zmm0 | 
|  | vaesenc	%zmm16,%zmm1,%zmm1 | 
|  | vaesenc	%zmm16,%zmm2,%zmm2 | 
|  | vaesenc	%zmm16,%zmm3,%zmm3 | 
|  |  | 
|  | vpxord	%zmm24,%zmm10,%zmm10 | 
|  | vpclmulqdq	$0x00,%zmm30,%zmm7,%zmm26 | 
|  | vpternlogd	$0x96,%zmm26,%zmm25,%zmm10 | 
|  | vpclmulqdq	$0x01,%zmm27,%zmm4,%zmm24 | 
|  |  | 
|  | vaesenc	%zmm17,%zmm0,%zmm0 | 
|  | vaesenc	%zmm17,%zmm1,%zmm1 | 
|  | vaesenc	%zmm17,%zmm2,%zmm2 | 
|  | vaesenc	%zmm17,%zmm3,%zmm3 | 
|  |  | 
|  | vpclmulqdq	$0x01,%zmm28,%zmm5,%zmm25 | 
|  | vpclmulqdq	$0x01,%zmm29,%zmm6,%zmm26 | 
|  | vpternlogd	$0x96,%zmm26,%zmm25,%zmm24 | 
|  | vpclmulqdq	$0x01,%zmm30,%zmm7,%zmm25 | 
|  |  | 
|  | vaesenc	%zmm18,%zmm0,%zmm0 | 
|  | vaesenc	%zmm18,%zmm1,%zmm1 | 
|  | vaesenc	%zmm18,%zmm2,%zmm2 | 
|  | vaesenc	%zmm18,%zmm3,%zmm3 | 
|  |  | 
|  | vpclmulqdq	$0x10,%zmm27,%zmm4,%zmm26 | 
|  | vpternlogd	$0x96,%zmm26,%zmm25,%zmm24 | 
|  | vpclmulqdq	$0x10,%zmm28,%zmm5,%zmm25 | 
|  | vpclmulqdq	$0x10,%zmm29,%zmm6,%zmm26 | 
|  |  | 
|  | vaesenc	%zmm19,%zmm0,%zmm0 | 
|  | vaesenc	%zmm19,%zmm1,%zmm1 | 
|  | vaesenc	%zmm19,%zmm2,%zmm2 | 
|  | vaesenc	%zmm19,%zmm3,%zmm3 | 
|  |  | 
|  | vpternlogd	$0x96,%zmm26,%zmm25,%zmm24 | 
|  | vpclmulqdq	$0x01,%zmm10,%zmm31,%zmm26 | 
|  | vpclmulqdq	$0x10,%zmm30,%zmm7,%zmm25 | 
|  | vpxord	%zmm25,%zmm24,%zmm24 | 
|  |  | 
|  | vaesenc	%zmm20,%zmm0,%zmm0 | 
|  | vaesenc	%zmm20,%zmm1,%zmm1 | 
|  | vaesenc	%zmm20,%zmm2,%zmm2 | 
|  | vaesenc	%zmm20,%zmm3,%zmm3 | 
|  |  | 
|  | vpshufd	$0x4e,%zmm10,%zmm10 | 
|  | vpclmulqdq	$0x11,%zmm27,%zmm4,%zmm4 | 
|  | vpclmulqdq	$0x11,%zmm28,%zmm5,%zmm5 | 
|  | vpclmulqdq	$0x11,%zmm29,%zmm6,%zmm6 | 
|  |  | 
|  | vaesenc	%zmm21,%zmm0,%zmm0 | 
|  | vaesenc	%zmm21,%zmm1,%zmm1 | 
|  | vaesenc	%zmm21,%zmm2,%zmm2 | 
|  | vaesenc	%zmm21,%zmm3,%zmm3 | 
|  |  | 
|  | vpternlogd	$0x96,%zmm26,%zmm10,%zmm24 | 
|  | vpclmulqdq	$0x11,%zmm30,%zmm7,%zmm7 | 
|  | vpternlogd	$0x96,%zmm6,%zmm5,%zmm4 | 
|  | vpclmulqdq	$0x01,%zmm24,%zmm31,%zmm25 | 
|  |  | 
|  | vaesenc	%zmm22,%zmm0,%zmm0 | 
|  | vaesenc	%zmm22,%zmm1,%zmm1 | 
|  | vaesenc	%zmm22,%zmm2,%zmm2 | 
|  | vaesenc	%zmm22,%zmm3,%zmm3 | 
|  |  | 
|  | vpxord	%zmm7,%zmm4,%zmm10 | 
|  | vpshufd	$0x4e,%zmm24,%zmm24 | 
|  | vpternlogd	$0x96,%zmm25,%zmm24,%zmm10 | 
|  |  | 
|  | vaesenc	%zmm23,%zmm0,%zmm0 | 
|  | vaesenc	%zmm23,%zmm1,%zmm1 | 
|  | vaesenc	%zmm23,%zmm2,%zmm2 | 
|  | vaesenc	%zmm23,%zmm3,%zmm3 | 
|  |  | 
|  | vextracti32x4	$1,%zmm10,%xmm4 | 
|  | vextracti32x4	$2,%zmm10,%xmm5 | 
|  | vextracti32x4	$3,%zmm10,%xmm6 | 
|  | vpxord	%xmm4,%xmm10,%xmm10 | 
|  | vpternlogd	$0x96,%xmm5,%xmm6,%xmm10 | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | vpxord	0(%rdi),%zmm14,%zmm4 | 
|  | vpxord	64(%rdi),%zmm14,%zmm5 | 
|  | vpxord	128(%rdi),%zmm14,%zmm6 | 
|  | vpxord	192(%rdi),%zmm14,%zmm7 | 
|  |  | 
|  |  | 
|  |  | 
|  | vaesenclast	%zmm4,%zmm0,%zmm4 | 
|  | vaesenclast	%zmm5,%zmm1,%zmm5 | 
|  | vaesenclast	%zmm6,%zmm2,%zmm6 | 
|  | vaesenclast	%zmm7,%zmm3,%zmm7 | 
|  |  | 
|  |  | 
|  | vmovdqu8	%zmm4,0(%rsi) | 
|  | vmovdqu8	%zmm5,64(%rsi) | 
|  | vmovdqu8	%zmm6,128(%rsi) | 
|  | vmovdqu8	%zmm7,192(%rsi) | 
|  |  | 
|  | subq	$-256,%rdi | 
|  | subq	$-256,%rsi | 
|  | addq	$-256,%rdx | 
|  | cmpq	$256-1,%rdx | 
|  | ja	.Lcrypt_loop_4x__func1 | 
|  | .Lghash_last_ciphertext_4x__func1: | 
|  | vpshufb	%zmm8,%zmm4,%zmm4 | 
|  | vpxord	%zmm10,%zmm4,%zmm4 | 
|  | vpshufb	%zmm8,%zmm5,%zmm5 | 
|  | vpshufb	%zmm8,%zmm6,%zmm6 | 
|  | vpshufb	%zmm8,%zmm7,%zmm7 | 
|  | vpclmulqdq	$0x00,%zmm27,%zmm4,%zmm10 | 
|  | vpclmulqdq	$0x00,%zmm28,%zmm5,%zmm24 | 
|  | vpclmulqdq	$0x00,%zmm29,%zmm6,%zmm25 | 
|  | vpxord	%zmm24,%zmm10,%zmm10 | 
|  | vpclmulqdq	$0x00,%zmm30,%zmm7,%zmm26 | 
|  | vpternlogd	$0x96,%zmm26,%zmm25,%zmm10 | 
|  | vpclmulqdq	$0x01,%zmm27,%zmm4,%zmm24 | 
|  | vpclmulqdq	$0x01,%zmm28,%zmm5,%zmm25 | 
|  | vpclmulqdq	$0x01,%zmm29,%zmm6,%zmm26 | 
|  | vpternlogd	$0x96,%zmm26,%zmm25,%zmm24 | 
|  | vpclmulqdq	$0x01,%zmm30,%zmm7,%zmm25 | 
|  | vpclmulqdq	$0x10,%zmm27,%zmm4,%zmm26 | 
|  | vpternlogd	$0x96,%zmm26,%zmm25,%zmm24 | 
|  | vpclmulqdq	$0x10,%zmm28,%zmm5,%zmm25 | 
|  | vpclmulqdq	$0x10,%zmm29,%zmm6,%zmm26 | 
|  | vpternlogd	$0x96,%zmm26,%zmm25,%zmm24 | 
|  | vpclmulqdq	$0x01,%zmm10,%zmm31,%zmm26 | 
|  | vpclmulqdq	$0x10,%zmm30,%zmm7,%zmm25 | 
|  | vpxord	%zmm25,%zmm24,%zmm24 | 
|  | vpshufd	$0x4e,%zmm10,%zmm10 | 
|  | vpclmulqdq	$0x11,%zmm27,%zmm4,%zmm4 | 
|  | vpclmulqdq	$0x11,%zmm28,%zmm5,%zmm5 | 
|  | vpclmulqdq	$0x11,%zmm29,%zmm6,%zmm6 | 
|  | vpternlogd	$0x96,%zmm26,%zmm10,%zmm24 | 
|  | vpclmulqdq	$0x11,%zmm30,%zmm7,%zmm7 | 
|  | vpternlogd	$0x96,%zmm6,%zmm5,%zmm4 | 
|  | vpclmulqdq	$0x01,%zmm24,%zmm31,%zmm25 | 
|  | vpxord	%zmm7,%zmm4,%zmm10 | 
|  | vpshufd	$0x4e,%zmm24,%zmm24 | 
|  | vpternlogd	$0x96,%zmm25,%zmm24,%zmm10 | 
|  | vextracti32x4	$1,%zmm10,%xmm4 | 
|  | vextracti32x4	$2,%zmm10,%xmm5 | 
|  | vextracti32x4	$3,%zmm10,%xmm6 | 
|  | vpxord	%xmm4,%xmm10,%xmm10 | 
|  | vpternlogd	$0x96,%xmm5,%xmm6,%xmm10 | 
|  |  | 
|  | .Lcrypt_loop_4x_done__func1: | 
|  |  | 
|  | testq	%rdx,%rdx | 
|  | jz	.Ldone__func1 | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | movq	%rdx,%rax | 
|  | negq	%rax | 
|  | andq	$-16,%rax | 
|  | leaq	256(%r9,%rax,1),%r8 | 
|  | vpxor	%xmm4,%xmm4,%xmm4 | 
|  | vpxor	%xmm5,%xmm5,%xmm5 | 
|  | vpxor	%xmm6,%xmm6,%xmm6 | 
|  |  | 
|  | cmpq	$64,%rdx | 
|  | jb	.Lpartial_vec__func1 | 
|  |  | 
|  | .Lcrypt_loop_1x__func1: | 
|  |  | 
|  |  | 
|  |  | 
|  | vpshufb	%zmm8,%zmm12,%zmm0 | 
|  | vpaddd	%zmm11,%zmm12,%zmm12 | 
|  | vpxord	%zmm13,%zmm0,%zmm0 | 
|  | leaq	16(%rcx),%rax | 
|  | .Lvaesenc_loop_tail_full_vec__func1: | 
|  | vbroadcasti32x4	(%rax),%zmm9 | 
|  | vaesenc	%zmm9,%zmm0,%zmm0 | 
|  | addq	$16,%rax | 
|  | cmpq	%rax,%r11 | 
|  | jne	.Lvaesenc_loop_tail_full_vec__func1 | 
|  | vaesenclast	%zmm14,%zmm0,%zmm0 | 
|  |  | 
|  |  | 
|  | vmovdqu8	(%rdi),%zmm1 | 
|  | vpxord	%zmm1,%zmm0,%zmm0 | 
|  | vmovdqu8	%zmm0,(%rsi) | 
|  |  | 
|  |  | 
|  | vmovdqu8	(%r8),%zmm30 | 
|  | vpshufb	%zmm8,%zmm0,%zmm0 | 
|  | vpxord	%zmm10,%zmm0,%zmm0 | 
|  | vpclmulqdq	$0x00,%zmm30,%zmm0,%zmm7 | 
|  | vpclmulqdq	$0x01,%zmm30,%zmm0,%zmm1 | 
|  | vpclmulqdq	$0x10,%zmm30,%zmm0,%zmm2 | 
|  | vpclmulqdq	$0x11,%zmm30,%zmm0,%zmm3 | 
|  | vpxord	%zmm7,%zmm4,%zmm4 | 
|  | vpternlogd	$0x96,%zmm2,%zmm1,%zmm5 | 
|  | vpxord	%zmm3,%zmm6,%zmm6 | 
|  |  | 
|  | vpxor	%xmm10,%xmm10,%xmm10 | 
|  |  | 
|  | addq	$64,%r8 | 
|  | addq	$64,%rdi | 
|  | addq	$64,%rsi | 
|  | subq	$64,%rdx | 
|  | cmpq	$64,%rdx | 
|  | jae	.Lcrypt_loop_1x__func1 | 
|  |  | 
|  | testq	%rdx,%rdx | 
|  | jz	.Lreduce__func1 | 
|  |  | 
|  | .Lpartial_vec__func1: | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | movq	$-1,%rax | 
|  | bzhiq	%rdx,%rax,%rax | 
|  | kmovq	%rax,%k1 | 
|  | addq	$15,%rdx | 
|  | andq	$-16,%rdx | 
|  | movq	$-1,%rax | 
|  | bzhiq	%rdx,%rax,%rax | 
|  | kmovq	%rax,%k2 | 
|  |  | 
|  |  | 
|  |  | 
|  | vpshufb	%zmm8,%zmm12,%zmm0 | 
|  | vpxord	%zmm13,%zmm0,%zmm0 | 
|  | leaq	16(%rcx),%rax | 
|  | .Lvaesenc_loop_tail_partialvec__func1: | 
|  | vbroadcasti32x4	(%rax),%zmm9 | 
|  | vaesenc	%zmm9,%zmm0,%zmm0 | 
|  | addq	$16,%rax | 
|  | cmpq	%rax,%r11 | 
|  | jne	.Lvaesenc_loop_tail_partialvec__func1 | 
|  | vaesenclast	%zmm14,%zmm0,%zmm0 | 
|  |  | 
|  |  | 
|  | vmovdqu8	(%rdi),%zmm1{%k1}{z} | 
|  | vpxord	%zmm1,%zmm0,%zmm0 | 
|  | vmovdqu8	%zmm0,(%rsi){%k1} | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | vmovdqu8	(%r8),%zmm30{%k2}{z} | 
|  | vmovdqu8	%zmm0,%zmm1{%k1}{z} | 
|  | vpshufb	%zmm8,%zmm1,%zmm0 | 
|  | vpxord	%zmm10,%zmm0,%zmm0 | 
|  | vpclmulqdq	$0x00,%zmm30,%zmm0,%zmm7 | 
|  | vpclmulqdq	$0x01,%zmm30,%zmm0,%zmm1 | 
|  | vpclmulqdq	$0x10,%zmm30,%zmm0,%zmm2 | 
|  | vpclmulqdq	$0x11,%zmm30,%zmm0,%zmm3 | 
|  | vpxord	%zmm7,%zmm4,%zmm4 | 
|  | vpternlogd	$0x96,%zmm2,%zmm1,%zmm5 | 
|  | vpxord	%zmm3,%zmm6,%zmm6 | 
|  |  | 
|  |  | 
|  | .Lreduce__func1: | 
|  |  | 
|  | vpclmulqdq	$0x01,%zmm4,%zmm31,%zmm0 | 
|  | vpshufd	$0x4e,%zmm4,%zmm4 | 
|  | vpternlogd	$0x96,%zmm0,%zmm4,%zmm5 | 
|  | vpclmulqdq	$0x01,%zmm5,%zmm31,%zmm0 | 
|  | vpshufd	$0x4e,%zmm5,%zmm5 | 
|  | vpternlogd	$0x96,%zmm0,%zmm5,%zmm6 | 
|  |  | 
|  | vextracti32x4	$1,%zmm6,%xmm0 | 
|  | vextracti32x4	$2,%zmm6,%xmm1 | 
|  | vextracti32x4	$3,%zmm6,%xmm2 | 
|  | vpxord	%xmm0,%xmm6,%xmm10 | 
|  | vpternlogd	$0x96,%xmm1,%xmm2,%xmm10 | 
|  |  | 
|  |  | 
|  | .Ldone__func1: | 
|  |  | 
|  | vpshufb	%xmm8,%xmm10,%xmm10 | 
|  | vmovdqu	%xmm10,(%r12) | 
|  |  | 
|  | vzeroupper | 
|  | popq	%r12 | 
|  | .cfi_adjust_cfa_offset	-8 | 
|  | .cfi_restore	%r12 | 
|  | ret | 
|  |  | 
|  | .cfi_endproc | 
|  | .size	aes_gcm_enc_update_vaes_avx10_512, . - aes_gcm_enc_update_vaes_avx10_512 | 
|  | .globl	aes_gcm_dec_update_vaes_avx10_512 | 
|  | .hidden aes_gcm_dec_update_vaes_avx10_512 | 
|  | .type	aes_gcm_dec_update_vaes_avx10_512,@function | 
|  | .align	32 | 
|  | aes_gcm_dec_update_vaes_avx10_512: | 
|  | .cfi_startproc | 
|  |  | 
|  | _CET_ENDBR | 
|  | pushq	%r12 | 
|  | .cfi_adjust_cfa_offset	8 | 
|  | .cfi_offset	%r12,-16 | 
|  |  | 
|  | movq	16(%rsp),%r12 | 
|  |  | 
|  | vbroadcasti32x4	.Lbswap_mask(%rip),%zmm8 | 
|  | vbroadcasti32x4	.Lgfpoly(%rip),%zmm31 | 
|  |  | 
|  |  | 
|  |  | 
|  | vmovdqu	(%r12),%xmm10 | 
|  | vpshufb	%xmm8,%xmm10,%xmm10 | 
|  | vbroadcasti32x4	(%r8),%zmm12 | 
|  | vpshufb	%zmm8,%zmm12,%zmm12 | 
|  |  | 
|  |  | 
|  |  | 
|  | movl	240(%rcx),%r10d | 
|  | leal	-20(,%r10,4),%r10d | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | leaq	96(%rcx,%r10,4),%r11 | 
|  | vbroadcasti32x4	(%rcx),%zmm13 | 
|  | vbroadcasti32x4	(%r11),%zmm14 | 
|  |  | 
|  |  | 
|  | vpaddd	.Lctr_pattern(%rip),%zmm12,%zmm12 | 
|  |  | 
|  |  | 
|  | vbroadcasti32x4	.Linc_4blocks(%rip),%zmm11 | 
|  |  | 
|  |  | 
|  |  | 
|  | cmpq	$256-1,%rdx | 
|  | jbe	.Lcrypt_loop_4x_done__func2 | 
|  |  | 
|  |  | 
|  | vmovdqu8	256-256(%r9),%zmm27 | 
|  | vmovdqu8	256-192(%r9),%zmm28 | 
|  | vmovdqu8	256-128(%r9),%zmm29 | 
|  | vmovdqu8	256-64(%r9),%zmm30 | 
|  | vbroadcasti32x4	-144(%r11),%zmm15 | 
|  | vbroadcasti32x4	-128(%r11),%zmm16 | 
|  | vbroadcasti32x4	-112(%r11),%zmm17 | 
|  | vbroadcasti32x4	-96(%r11),%zmm18 | 
|  | vbroadcasti32x4	-80(%r11),%zmm19 | 
|  | vbroadcasti32x4	-64(%r11),%zmm20 | 
|  | vbroadcasti32x4	-48(%r11),%zmm21 | 
|  | vbroadcasti32x4	-32(%r11),%zmm22 | 
|  | vbroadcasti32x4	-16(%r11),%zmm23 | 
|  | .Lcrypt_loop_4x__func2: | 
|  | vmovdqu8	0(%rdi),%zmm4 | 
|  | vmovdqu8	64(%rdi),%zmm5 | 
|  | vmovdqu8	128(%rdi),%zmm6 | 
|  | vmovdqu8	192(%rdi),%zmm7 | 
|  |  | 
|  |  | 
|  |  | 
|  | vpshufb	%zmm8,%zmm12,%zmm0 | 
|  | vpaddd	%zmm11,%zmm12,%zmm12 | 
|  | vpshufb	%zmm8,%zmm12,%zmm1 | 
|  | vpaddd	%zmm11,%zmm12,%zmm12 | 
|  | vpshufb	%zmm8,%zmm12,%zmm2 | 
|  | vpaddd	%zmm11,%zmm12,%zmm12 | 
|  | vpshufb	%zmm8,%zmm12,%zmm3 | 
|  | vpaddd	%zmm11,%zmm12,%zmm12 | 
|  |  | 
|  |  | 
|  | vpxord	%zmm13,%zmm0,%zmm0 | 
|  | vpxord	%zmm13,%zmm1,%zmm1 | 
|  | vpxord	%zmm13,%zmm2,%zmm2 | 
|  | vpxord	%zmm13,%zmm3,%zmm3 | 
|  |  | 
|  | cmpl	$24,%r10d | 
|  | jl	.Laes128__func2 | 
|  | je	.Laes192__func2 | 
|  |  | 
|  | vbroadcasti32x4	-208(%r11),%zmm9 | 
|  | vaesenc	%zmm9,%zmm0,%zmm0 | 
|  | vaesenc	%zmm9,%zmm1,%zmm1 | 
|  | vaesenc	%zmm9,%zmm2,%zmm2 | 
|  | vaesenc	%zmm9,%zmm3,%zmm3 | 
|  |  | 
|  | vbroadcasti32x4	-192(%r11),%zmm9 | 
|  | vaesenc	%zmm9,%zmm0,%zmm0 | 
|  | vaesenc	%zmm9,%zmm1,%zmm1 | 
|  | vaesenc	%zmm9,%zmm2,%zmm2 | 
|  | vaesenc	%zmm9,%zmm3,%zmm3 | 
|  |  | 
|  | .Laes192__func2: | 
|  | vbroadcasti32x4	-176(%r11),%zmm9 | 
|  | vaesenc	%zmm9,%zmm0,%zmm0 | 
|  | vaesenc	%zmm9,%zmm1,%zmm1 | 
|  | vaesenc	%zmm9,%zmm2,%zmm2 | 
|  | vaesenc	%zmm9,%zmm3,%zmm3 | 
|  |  | 
|  | vbroadcasti32x4	-160(%r11),%zmm9 | 
|  | vaesenc	%zmm9,%zmm0,%zmm0 | 
|  | vaesenc	%zmm9,%zmm1,%zmm1 | 
|  | vaesenc	%zmm9,%zmm2,%zmm2 | 
|  | vaesenc	%zmm9,%zmm3,%zmm3 | 
|  |  | 
|  | .Laes128__func2: | 
|  | prefetcht0	512+0(%rdi) | 
|  | prefetcht0	512+64(%rdi) | 
|  | prefetcht0	512+128(%rdi) | 
|  | prefetcht0	512+192(%rdi) | 
|  | vpshufb	%zmm8,%zmm4,%zmm4 | 
|  | vpxord	%zmm10,%zmm4,%zmm4 | 
|  | vpshufb	%zmm8,%zmm5,%zmm5 | 
|  | vpshufb	%zmm8,%zmm6,%zmm6 | 
|  |  | 
|  | vaesenc	%zmm15,%zmm0,%zmm0 | 
|  | vaesenc	%zmm15,%zmm1,%zmm1 | 
|  | vaesenc	%zmm15,%zmm2,%zmm2 | 
|  | vaesenc	%zmm15,%zmm3,%zmm3 | 
|  |  | 
|  | vpshufb	%zmm8,%zmm7,%zmm7 | 
|  | vpclmulqdq	$0x00,%zmm27,%zmm4,%zmm10 | 
|  | vpclmulqdq	$0x00,%zmm28,%zmm5,%zmm24 | 
|  | vpclmulqdq	$0x00,%zmm29,%zmm6,%zmm25 | 
|  |  | 
|  | vaesenc	%zmm16,%zmm0,%zmm0 | 
|  | vaesenc	%zmm16,%zmm1,%zmm1 | 
|  | vaesenc	%zmm16,%zmm2,%zmm2 | 
|  | vaesenc	%zmm16,%zmm3,%zmm3 | 
|  |  | 
|  | vpxord	%zmm24,%zmm10,%zmm10 | 
|  | vpclmulqdq	$0x00,%zmm30,%zmm7,%zmm26 | 
|  | vpternlogd	$0x96,%zmm26,%zmm25,%zmm10 | 
|  | vpclmulqdq	$0x01,%zmm27,%zmm4,%zmm24 | 
|  |  | 
|  | vaesenc	%zmm17,%zmm0,%zmm0 | 
|  | vaesenc	%zmm17,%zmm1,%zmm1 | 
|  | vaesenc	%zmm17,%zmm2,%zmm2 | 
|  | vaesenc	%zmm17,%zmm3,%zmm3 | 
|  |  | 
|  | vpclmulqdq	$0x01,%zmm28,%zmm5,%zmm25 | 
|  | vpclmulqdq	$0x01,%zmm29,%zmm6,%zmm26 | 
|  | vpternlogd	$0x96,%zmm26,%zmm25,%zmm24 | 
|  | vpclmulqdq	$0x01,%zmm30,%zmm7,%zmm25 | 
|  |  | 
|  | vaesenc	%zmm18,%zmm0,%zmm0 | 
|  | vaesenc	%zmm18,%zmm1,%zmm1 | 
|  | vaesenc	%zmm18,%zmm2,%zmm2 | 
|  | vaesenc	%zmm18,%zmm3,%zmm3 | 
|  |  | 
|  | vpclmulqdq	$0x10,%zmm27,%zmm4,%zmm26 | 
|  | vpternlogd	$0x96,%zmm26,%zmm25,%zmm24 | 
|  | vpclmulqdq	$0x10,%zmm28,%zmm5,%zmm25 | 
|  | vpclmulqdq	$0x10,%zmm29,%zmm6,%zmm26 | 
|  |  | 
|  | vaesenc	%zmm19,%zmm0,%zmm0 | 
|  | vaesenc	%zmm19,%zmm1,%zmm1 | 
|  | vaesenc	%zmm19,%zmm2,%zmm2 | 
|  | vaesenc	%zmm19,%zmm3,%zmm3 | 
|  |  | 
|  | vpternlogd	$0x96,%zmm26,%zmm25,%zmm24 | 
|  | vpclmulqdq	$0x01,%zmm10,%zmm31,%zmm26 | 
|  | vpclmulqdq	$0x10,%zmm30,%zmm7,%zmm25 | 
|  | vpxord	%zmm25,%zmm24,%zmm24 | 
|  |  | 
|  | vaesenc	%zmm20,%zmm0,%zmm0 | 
|  | vaesenc	%zmm20,%zmm1,%zmm1 | 
|  | vaesenc	%zmm20,%zmm2,%zmm2 | 
|  | vaesenc	%zmm20,%zmm3,%zmm3 | 
|  |  | 
|  | vpshufd	$0x4e,%zmm10,%zmm10 | 
|  | vpclmulqdq	$0x11,%zmm27,%zmm4,%zmm4 | 
|  | vpclmulqdq	$0x11,%zmm28,%zmm5,%zmm5 | 
|  | vpclmulqdq	$0x11,%zmm29,%zmm6,%zmm6 | 
|  |  | 
|  | vaesenc	%zmm21,%zmm0,%zmm0 | 
|  | vaesenc	%zmm21,%zmm1,%zmm1 | 
|  | vaesenc	%zmm21,%zmm2,%zmm2 | 
|  | vaesenc	%zmm21,%zmm3,%zmm3 | 
|  |  | 
|  | vpternlogd	$0x96,%zmm26,%zmm10,%zmm24 | 
|  | vpclmulqdq	$0x11,%zmm30,%zmm7,%zmm7 | 
|  | vpternlogd	$0x96,%zmm6,%zmm5,%zmm4 | 
|  | vpclmulqdq	$0x01,%zmm24,%zmm31,%zmm25 | 
|  |  | 
|  | vaesenc	%zmm22,%zmm0,%zmm0 | 
|  | vaesenc	%zmm22,%zmm1,%zmm1 | 
|  | vaesenc	%zmm22,%zmm2,%zmm2 | 
|  | vaesenc	%zmm22,%zmm3,%zmm3 | 
|  |  | 
|  | vpxord	%zmm7,%zmm4,%zmm10 | 
|  | vpshufd	$0x4e,%zmm24,%zmm24 | 
|  | vpternlogd	$0x96,%zmm25,%zmm24,%zmm10 | 
|  |  | 
|  | vaesenc	%zmm23,%zmm0,%zmm0 | 
|  | vaesenc	%zmm23,%zmm1,%zmm1 | 
|  | vaesenc	%zmm23,%zmm2,%zmm2 | 
|  | vaesenc	%zmm23,%zmm3,%zmm3 | 
|  |  | 
|  | vextracti32x4	$1,%zmm10,%xmm4 | 
|  | vextracti32x4	$2,%zmm10,%xmm5 | 
|  | vextracti32x4	$3,%zmm10,%xmm6 | 
|  | vpxord	%xmm4,%xmm10,%xmm10 | 
|  | vpternlogd	$0x96,%xmm5,%xmm6,%xmm10 | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | vpxord	0(%rdi),%zmm14,%zmm4 | 
|  | vpxord	64(%rdi),%zmm14,%zmm5 | 
|  | vpxord	128(%rdi),%zmm14,%zmm6 | 
|  | vpxord	192(%rdi),%zmm14,%zmm7 | 
|  |  | 
|  |  | 
|  |  | 
|  | vaesenclast	%zmm4,%zmm0,%zmm4 | 
|  | vaesenclast	%zmm5,%zmm1,%zmm5 | 
|  | vaesenclast	%zmm6,%zmm2,%zmm6 | 
|  | vaesenclast	%zmm7,%zmm3,%zmm7 | 
|  |  | 
|  |  | 
|  | vmovdqu8	%zmm4,0(%rsi) | 
|  | vmovdqu8	%zmm5,64(%rsi) | 
|  | vmovdqu8	%zmm6,128(%rsi) | 
|  | vmovdqu8	%zmm7,192(%rsi) | 
|  |  | 
|  | subq	$-256,%rdi | 
|  | subq	$-256,%rsi | 
|  | addq	$-256,%rdx | 
|  | cmpq	$256-1,%rdx | 
|  | ja	.Lcrypt_loop_4x__func2 | 
|  | .Lcrypt_loop_4x_done__func2: | 
|  |  | 
|  | testq	%rdx,%rdx | 
|  | jz	.Ldone__func2 | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | movq	%rdx,%rax | 
|  | negq	%rax | 
|  | andq	$-16,%rax | 
|  | leaq	256(%r9,%rax,1),%r8 | 
|  | vpxor	%xmm4,%xmm4,%xmm4 | 
|  | vpxor	%xmm5,%xmm5,%xmm5 | 
|  | vpxor	%xmm6,%xmm6,%xmm6 | 
|  |  | 
|  | cmpq	$64,%rdx | 
|  | jb	.Lpartial_vec__func2 | 
|  |  | 
|  | .Lcrypt_loop_1x__func2: | 
|  |  | 
|  |  | 
|  |  | 
|  | vpshufb	%zmm8,%zmm12,%zmm0 | 
|  | vpaddd	%zmm11,%zmm12,%zmm12 | 
|  | vpxord	%zmm13,%zmm0,%zmm0 | 
|  | leaq	16(%rcx),%rax | 
|  | .Lvaesenc_loop_tail_full_vec__func2: | 
|  | vbroadcasti32x4	(%rax),%zmm9 | 
|  | vaesenc	%zmm9,%zmm0,%zmm0 | 
|  | addq	$16,%rax | 
|  | cmpq	%rax,%r11 | 
|  | jne	.Lvaesenc_loop_tail_full_vec__func2 | 
|  | vaesenclast	%zmm14,%zmm0,%zmm0 | 
|  |  | 
|  |  | 
|  | vmovdqu8	(%rdi),%zmm1 | 
|  | vpxord	%zmm1,%zmm0,%zmm0 | 
|  | vmovdqu8	%zmm0,(%rsi) | 
|  |  | 
|  |  | 
|  | vmovdqu8	(%r8),%zmm30 | 
|  | vpshufb	%zmm8,%zmm1,%zmm0 | 
|  | vpxord	%zmm10,%zmm0,%zmm0 | 
|  | vpclmulqdq	$0x00,%zmm30,%zmm0,%zmm7 | 
|  | vpclmulqdq	$0x01,%zmm30,%zmm0,%zmm1 | 
|  | vpclmulqdq	$0x10,%zmm30,%zmm0,%zmm2 | 
|  | vpclmulqdq	$0x11,%zmm30,%zmm0,%zmm3 | 
|  | vpxord	%zmm7,%zmm4,%zmm4 | 
|  | vpternlogd	$0x96,%zmm2,%zmm1,%zmm5 | 
|  | vpxord	%zmm3,%zmm6,%zmm6 | 
|  |  | 
|  | vpxor	%xmm10,%xmm10,%xmm10 | 
|  |  | 
|  | addq	$64,%r8 | 
|  | addq	$64,%rdi | 
|  | addq	$64,%rsi | 
|  | subq	$64,%rdx | 
|  | cmpq	$64,%rdx | 
|  | jae	.Lcrypt_loop_1x__func2 | 
|  |  | 
|  | testq	%rdx,%rdx | 
|  | jz	.Lreduce__func2 | 
|  |  | 
|  | .Lpartial_vec__func2: | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | movq	$-1,%rax | 
|  | bzhiq	%rdx,%rax,%rax | 
|  | kmovq	%rax,%k1 | 
|  | addq	$15,%rdx | 
|  | andq	$-16,%rdx | 
|  | movq	$-1,%rax | 
|  | bzhiq	%rdx,%rax,%rax | 
|  | kmovq	%rax,%k2 | 
|  |  | 
|  |  | 
|  |  | 
|  | vpshufb	%zmm8,%zmm12,%zmm0 | 
|  | vpxord	%zmm13,%zmm0,%zmm0 | 
|  | leaq	16(%rcx),%rax | 
|  | .Lvaesenc_loop_tail_partialvec__func2: | 
|  | vbroadcasti32x4	(%rax),%zmm9 | 
|  | vaesenc	%zmm9,%zmm0,%zmm0 | 
|  | addq	$16,%rax | 
|  | cmpq	%rax,%r11 | 
|  | jne	.Lvaesenc_loop_tail_partialvec__func2 | 
|  | vaesenclast	%zmm14,%zmm0,%zmm0 | 
|  |  | 
|  |  | 
|  | vmovdqu8	(%rdi),%zmm1{%k1}{z} | 
|  | vpxord	%zmm1,%zmm0,%zmm0 | 
|  | vmovdqu8	%zmm0,(%rsi){%k1} | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | vmovdqu8	(%r8),%zmm30{%k2}{z} | 
|  |  | 
|  | vpshufb	%zmm8,%zmm1,%zmm0 | 
|  | vpxord	%zmm10,%zmm0,%zmm0 | 
|  | vpclmulqdq	$0x00,%zmm30,%zmm0,%zmm7 | 
|  | vpclmulqdq	$0x01,%zmm30,%zmm0,%zmm1 | 
|  | vpclmulqdq	$0x10,%zmm30,%zmm0,%zmm2 | 
|  | vpclmulqdq	$0x11,%zmm30,%zmm0,%zmm3 | 
|  | vpxord	%zmm7,%zmm4,%zmm4 | 
|  | vpternlogd	$0x96,%zmm2,%zmm1,%zmm5 | 
|  | vpxord	%zmm3,%zmm6,%zmm6 | 
|  |  | 
|  |  | 
|  | .Lreduce__func2: | 
|  |  | 
|  | vpclmulqdq	$0x01,%zmm4,%zmm31,%zmm0 | 
|  | vpshufd	$0x4e,%zmm4,%zmm4 | 
|  | vpternlogd	$0x96,%zmm0,%zmm4,%zmm5 | 
|  | vpclmulqdq	$0x01,%zmm5,%zmm31,%zmm0 | 
|  | vpshufd	$0x4e,%zmm5,%zmm5 | 
|  | vpternlogd	$0x96,%zmm0,%zmm5,%zmm6 | 
|  |  | 
|  | vextracti32x4	$1,%zmm6,%xmm0 | 
|  | vextracti32x4	$2,%zmm6,%xmm1 | 
|  | vextracti32x4	$3,%zmm6,%xmm2 | 
|  | vpxord	%xmm0,%xmm6,%xmm10 | 
|  | vpternlogd	$0x96,%xmm1,%xmm2,%xmm10 | 
|  |  | 
|  |  | 
|  | .Ldone__func2: | 
|  |  | 
|  | vpshufb	%xmm8,%xmm10,%xmm10 | 
|  | vmovdqu	%xmm10,(%r12) | 
|  |  | 
|  | vzeroupper | 
|  | popq	%r12 | 
|  | .cfi_adjust_cfa_offset	-8 | 
|  | .cfi_restore	%r12 | 
|  | ret | 
|  |  | 
|  | .cfi_endproc | 
|  | .size	aes_gcm_dec_update_vaes_avx10_512, . - aes_gcm_dec_update_vaes_avx10_512 | 
|  | #endif |