blob: b75bb07151d14112fe748e330acdc66fb9f62d7d [file] [log] [blame]
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.section __DATA,__const
.p2align 6
L$bswap_mask:
.quad 0x08090a0b0c0d0e0f, 0x0001020304050607
L$gfpoly:
.quad 1, 0xc200000000000000
L$gfpoly_and_internal_carrybit:
.quad 1, 0xc200000000000001
L$ctr_pattern:
.quad 0, 0
.quad 1, 0
L$inc_2blocks:
.quad 2, 0
.quad 3, 0
L$inc_4blocks:
.quad 4, 0
.text
.globl _gcm_gmult_vpclmulqdq_avx10
.private_extern _gcm_gmult_vpclmulqdq_avx10
.p2align 5
_gcm_gmult_vpclmulqdq_avx10:
_CET_ENDBR
vmovdqu (%rdi),%xmm0
vmovdqu L$bswap_mask(%rip),%xmm1
vmovdqu 256-16(%rsi),%xmm2
vmovdqu L$gfpoly(%rip),%xmm3
vpshufb %xmm1,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm4
vpclmulqdq $0x01,%xmm2,%xmm0,%xmm5
vpclmulqdq $0x10,%xmm2,%xmm0,%xmm6
vpxord %xmm6,%xmm5,%xmm5
vpclmulqdq $0x01,%xmm4,%xmm3,%xmm6
vpshufd $0x4e,%xmm4,%xmm4
vpternlogd $0x96,%xmm6,%xmm4,%xmm5
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm0
vpclmulqdq $0x01,%xmm5,%xmm3,%xmm4
vpshufd $0x4e,%xmm5,%xmm5
vpternlogd $0x96,%xmm4,%xmm5,%xmm0
vpshufb %xmm1,%xmm0,%xmm0
vmovdqu %xmm0,(%rdi)
ret
.globl _gcm_init_vpclmulqdq_avx10
.private_extern _gcm_init_vpclmulqdq_avx10
.p2align 5
_gcm_init_vpclmulqdq_avx10:
_CET_ENDBR
leaq 256-32(%rdi),%r8
vpshufd $0x4e,(%rsi),%xmm3
vpshufd $0xd3,%xmm3,%xmm0
vpsrad $31,%xmm0,%xmm0
vpaddq %xmm3,%xmm3,%xmm3
vpternlogd $0x78,L$gfpoly_and_internal_carrybit(%rip),%xmm0,%xmm3
vbroadcasti32x4 L$gfpoly(%rip),%ymm5
vpclmulqdq $0x00,%xmm3,%xmm3,%xmm0
vpclmulqdq $0x01,%xmm3,%xmm3,%xmm1
vpclmulqdq $0x10,%xmm3,%xmm3,%xmm2
vpxord %xmm2,%xmm1,%xmm1
vpclmulqdq $0x01,%xmm0,%xmm5,%xmm2
vpshufd $0x4e,%xmm0,%xmm0
vpternlogd $0x96,%xmm2,%xmm0,%xmm1
vpclmulqdq $0x11,%xmm3,%xmm3,%xmm4
vpclmulqdq $0x01,%xmm1,%xmm5,%xmm0
vpshufd $0x4e,%xmm1,%xmm1
vpternlogd $0x96,%xmm0,%xmm1,%xmm4
vinserti128 $1,%xmm3,%ymm4,%ymm3
vinserti128 $1,%xmm4,%ymm4,%ymm4
vmovdqu8 %ymm3,(%r8)
movl $7,%eax
L$precompute_next__func1:
subq $32,%r8
vpclmulqdq $0x00,%ymm4,%ymm3,%ymm0
vpclmulqdq $0x01,%ymm4,%ymm3,%ymm1
vpclmulqdq $0x10,%ymm4,%ymm3,%ymm2
vpxord %ymm2,%ymm1,%ymm1
vpclmulqdq $0x01,%ymm0,%ymm5,%ymm2
vpshufd $0x4e,%ymm0,%ymm0
vpternlogd $0x96,%ymm2,%ymm0,%ymm1
vpclmulqdq $0x11,%ymm4,%ymm3,%ymm3
vpclmulqdq $0x01,%ymm1,%ymm5,%ymm0
vpshufd $0x4e,%ymm1,%ymm1
vpternlogd $0x96,%ymm0,%ymm1,%ymm3
vmovdqu8 %ymm3,(%r8)
decl %eax
jnz L$precompute_next__func1
vzeroupper
ret
.globl _gcm_ghash_vpclmulqdq_avx10_256
.private_extern _gcm_ghash_vpclmulqdq_avx10_256
.p2align 5
_gcm_ghash_vpclmulqdq_avx10_256:
_CET_ENDBR
vmovdqu L$bswap_mask(%rip),%xmm4
vmovdqu L$gfpoly(%rip),%xmm10
vmovdqu (%rdi),%xmm5
vpshufb %xmm4,%xmm5,%xmm5
cmpq $32,%rcx
jb L$aad_blockbyblock__func1
vshufi64x2 $0,%ymm4,%ymm4,%ymm4
vshufi64x2 $0,%ymm10,%ymm10,%ymm10
vmovdqu8 256-32(%rsi),%ymm9
cmpq $128-1,%rcx
jbe L$aad_loop_1x__func1
vmovdqu8 256-128(%rsi),%ymm6
vmovdqu8 256-96(%rsi),%ymm7
vmovdqu8 256-64(%rsi),%ymm8
L$aad_loop_4x__func1:
vmovdqu8 0(%rdx),%ymm0
vmovdqu8 32(%rdx),%ymm1
vmovdqu8 64(%rdx),%ymm2
vmovdqu8 96(%rdx),%ymm3
vpshufb %ymm4,%ymm0,%ymm0
vpxord %ymm5,%ymm0,%ymm0
vpshufb %ymm4,%ymm1,%ymm1
vpshufb %ymm4,%ymm2,%ymm2
vpshufb %ymm4,%ymm3,%ymm3
vpclmulqdq $0x00,%ymm6,%ymm0,%ymm5
vpclmulqdq $0x00,%ymm7,%ymm1,%ymm11
vpclmulqdq $0x00,%ymm8,%ymm2,%ymm12
vpxord %ymm11,%ymm5,%ymm5
vpclmulqdq $0x00,%ymm9,%ymm3,%ymm13
vpternlogd $0x96,%ymm13,%ymm12,%ymm5
vpclmulqdq $0x01,%ymm6,%ymm0,%ymm11
vpclmulqdq $0x01,%ymm7,%ymm1,%ymm12
vpclmulqdq $0x01,%ymm8,%ymm2,%ymm13
vpternlogd $0x96,%ymm13,%ymm12,%ymm11
vpclmulqdq $0x01,%ymm9,%ymm3,%ymm12
vpclmulqdq $0x10,%ymm6,%ymm0,%ymm13
vpternlogd $0x96,%ymm13,%ymm12,%ymm11
vpclmulqdq $0x10,%ymm7,%ymm1,%ymm12
vpclmulqdq $0x10,%ymm8,%ymm2,%ymm13
vpternlogd $0x96,%ymm13,%ymm12,%ymm11
vpclmulqdq $0x01,%ymm5,%ymm10,%ymm13
vpclmulqdq $0x10,%ymm9,%ymm3,%ymm12
vpxord %ymm12,%ymm11,%ymm11
vpshufd $0x4e,%ymm5,%ymm5
vpclmulqdq $0x11,%ymm6,%ymm0,%ymm0
vpclmulqdq $0x11,%ymm7,%ymm1,%ymm1
vpclmulqdq $0x11,%ymm8,%ymm2,%ymm2
vpternlogd $0x96,%ymm13,%ymm5,%ymm11
vpclmulqdq $0x11,%ymm9,%ymm3,%ymm3
vpternlogd $0x96,%ymm2,%ymm1,%ymm0
vpclmulqdq $0x01,%ymm11,%ymm10,%ymm12
vpxord %ymm3,%ymm0,%ymm5
vpshufd $0x4e,%ymm11,%ymm11
vpternlogd $0x96,%ymm12,%ymm11,%ymm5
vextracti32x4 $1,%ymm5,%xmm0
vpxord %xmm0,%xmm5,%xmm5
subq $-128,%rdx
addq $-128,%rcx
cmpq $128-1,%rcx
ja L$aad_loop_4x__func1
cmpq $32,%rcx
jb L$aad_large_done__func1
L$aad_loop_1x__func1:
vmovdqu8 (%rdx),%ymm0
vpshufb %ymm4,%ymm0,%ymm0
vpxord %ymm0,%ymm5,%ymm5
vpclmulqdq $0x00,%ymm9,%ymm5,%ymm0
vpclmulqdq $0x01,%ymm9,%ymm5,%ymm1
vpclmulqdq $0x10,%ymm9,%ymm5,%ymm2
vpxord %ymm2,%ymm1,%ymm1
vpclmulqdq $0x01,%ymm0,%ymm10,%ymm2
vpshufd $0x4e,%ymm0,%ymm0
vpternlogd $0x96,%ymm2,%ymm0,%ymm1
vpclmulqdq $0x11,%ymm9,%ymm5,%ymm5
vpclmulqdq $0x01,%ymm1,%ymm10,%ymm0
vpshufd $0x4e,%ymm1,%ymm1
vpternlogd $0x96,%ymm0,%ymm1,%ymm5
vextracti32x4 $1,%ymm5,%xmm0
vpxord %xmm0,%xmm5,%xmm5
addq $32,%rdx
subq $32,%rcx
cmpq $32,%rcx
jae L$aad_loop_1x__func1
L$aad_large_done__func1:
vzeroupper
L$aad_blockbyblock__func1:
testq %rcx,%rcx
jz L$aad_done__func1
vmovdqu 256-16(%rsi),%xmm9
L$aad_loop_blockbyblock__func1:
vmovdqu (%rdx),%xmm0
vpshufb %xmm4,%xmm0,%xmm0
vpxor %xmm0,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm9,%xmm5,%xmm0
vpclmulqdq $0x01,%xmm9,%xmm5,%xmm1
vpclmulqdq $0x10,%xmm9,%xmm5,%xmm2
vpxord %xmm2,%xmm1,%xmm1
vpclmulqdq $0x01,%xmm0,%xmm10,%xmm2
vpshufd $0x4e,%xmm0,%xmm0
vpternlogd $0x96,%xmm2,%xmm0,%xmm1
vpclmulqdq $0x11,%xmm9,%xmm5,%xmm5
vpclmulqdq $0x01,%xmm1,%xmm10,%xmm0
vpshufd $0x4e,%xmm1,%xmm1
vpternlogd $0x96,%xmm0,%xmm1,%xmm5
addq $16,%rdx
subq $16,%rcx
jnz L$aad_loop_blockbyblock__func1
L$aad_done__func1:
vpshufb %xmm4,%xmm5,%xmm5
vmovdqu %xmm5,(%rdi)
ret
.globl _aes_gcm_enc_update_vaes_avx10_256
.private_extern _aes_gcm_enc_update_vaes_avx10_256
.p2align 5
_aes_gcm_enc_update_vaes_avx10_256:
_CET_ENDBR
pushq %r12
movq 16(%rsp),%r12
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,_BORINGSSL_function_hit+6(%rip)
#endif
vbroadcasti32x4 L$bswap_mask(%rip),%ymm8
vbroadcasti32x4 L$gfpoly(%rip),%ymm31
vmovdqu (%r12),%xmm10
vpshufb %xmm8,%xmm10,%xmm10
vbroadcasti32x4 (%r8),%ymm12
vpshufb %ymm8,%ymm12,%ymm12
movl 240(%rcx),%r10d
leal -20(,%r10,4),%r10d
leaq 96(%rcx,%r10,4),%r11
vbroadcasti32x4 (%rcx),%ymm13
vbroadcasti32x4 (%r11),%ymm14
vpaddd L$ctr_pattern(%rip),%ymm12,%ymm12
vbroadcasti32x4 L$inc_2blocks(%rip),%ymm11
cmpq $128-1,%rdx
jbe L$crypt_loop_4x_done__func1
vmovdqu8 256-128(%r9),%ymm27
vmovdqu8 256-96(%r9),%ymm28
vmovdqu8 256-64(%r9),%ymm29
vmovdqu8 256-32(%r9),%ymm30
vpshufb %ymm8,%ymm12,%ymm0
vpaddd %ymm11,%ymm12,%ymm12
vpshufb %ymm8,%ymm12,%ymm1
vpaddd %ymm11,%ymm12,%ymm12
vpshufb %ymm8,%ymm12,%ymm2
vpaddd %ymm11,%ymm12,%ymm12
vpshufb %ymm8,%ymm12,%ymm3
vpaddd %ymm11,%ymm12,%ymm12
vpxord %ymm13,%ymm0,%ymm0
vpxord %ymm13,%ymm1,%ymm1
vpxord %ymm13,%ymm2,%ymm2
vpxord %ymm13,%ymm3,%ymm3
leaq 16(%rcx),%rax
L$vaesenc_loop_first_4_vecs__func1:
vbroadcasti32x4 (%rax),%ymm9
vaesenc %ymm9,%ymm0,%ymm0
vaesenc %ymm9,%ymm1,%ymm1
vaesenc %ymm9,%ymm2,%ymm2
vaesenc %ymm9,%ymm3,%ymm3
addq $16,%rax
cmpq %rax,%r11
jne L$vaesenc_loop_first_4_vecs__func1
vpxord 0(%rdi),%ymm14,%ymm4
vpxord 32(%rdi),%ymm14,%ymm5
vpxord 64(%rdi),%ymm14,%ymm6
vpxord 96(%rdi),%ymm14,%ymm7
vaesenclast %ymm4,%ymm0,%ymm4
vaesenclast %ymm5,%ymm1,%ymm5
vaesenclast %ymm6,%ymm2,%ymm6
vaesenclast %ymm7,%ymm3,%ymm7
vmovdqu8 %ymm4,0(%rsi)
vmovdqu8 %ymm5,32(%rsi)
vmovdqu8 %ymm6,64(%rsi)
vmovdqu8 %ymm7,96(%rsi)
subq $-128,%rdi
subq $-128,%rsi
addq $-128,%rdx
cmpq $128-1,%rdx
jbe L$ghash_last_ciphertext_4x__func1
vbroadcasti32x4 -144(%r11),%ymm15
vbroadcasti32x4 -128(%r11),%ymm16
vbroadcasti32x4 -112(%r11),%ymm17
vbroadcasti32x4 -96(%r11),%ymm18
vbroadcasti32x4 -80(%r11),%ymm19
vbroadcasti32x4 -64(%r11),%ymm20
vbroadcasti32x4 -48(%r11),%ymm21
vbroadcasti32x4 -32(%r11),%ymm22
vbroadcasti32x4 -16(%r11),%ymm23
L$crypt_loop_4x__func1:
vpshufb %ymm8,%ymm12,%ymm0
vpaddd %ymm11,%ymm12,%ymm12
vpshufb %ymm8,%ymm12,%ymm1
vpaddd %ymm11,%ymm12,%ymm12
vpshufb %ymm8,%ymm12,%ymm2
vpaddd %ymm11,%ymm12,%ymm12
vpshufb %ymm8,%ymm12,%ymm3
vpaddd %ymm11,%ymm12,%ymm12
vpxord %ymm13,%ymm0,%ymm0
vpxord %ymm13,%ymm1,%ymm1
vpxord %ymm13,%ymm2,%ymm2
vpxord %ymm13,%ymm3,%ymm3
cmpl $24,%r10d
jl L$aes128__func1
je L$aes192__func1
vbroadcasti32x4 -208(%r11),%ymm9
vaesenc %ymm9,%ymm0,%ymm0
vaesenc %ymm9,%ymm1,%ymm1
vaesenc %ymm9,%ymm2,%ymm2
vaesenc %ymm9,%ymm3,%ymm3
vbroadcasti32x4 -192(%r11),%ymm9
vaesenc %ymm9,%ymm0,%ymm0
vaesenc %ymm9,%ymm1,%ymm1
vaesenc %ymm9,%ymm2,%ymm2
vaesenc %ymm9,%ymm3,%ymm3
L$aes192__func1:
vbroadcasti32x4 -176(%r11),%ymm9
vaesenc %ymm9,%ymm0,%ymm0
vaesenc %ymm9,%ymm1,%ymm1
vaesenc %ymm9,%ymm2,%ymm2
vaesenc %ymm9,%ymm3,%ymm3
vbroadcasti32x4 -160(%r11),%ymm9
vaesenc %ymm9,%ymm0,%ymm0
vaesenc %ymm9,%ymm1,%ymm1
vaesenc %ymm9,%ymm2,%ymm2
vaesenc %ymm9,%ymm3,%ymm3
L$aes128__func1:
vpshufb %ymm8,%ymm4,%ymm4
vpxord %ymm10,%ymm4,%ymm4
vpshufb %ymm8,%ymm5,%ymm5
vpshufb %ymm8,%ymm6,%ymm6
vaesenc %ymm15,%ymm0,%ymm0
vaesenc %ymm15,%ymm1,%ymm1
vaesenc %ymm15,%ymm2,%ymm2
vaesenc %ymm15,%ymm3,%ymm3
vpshufb %ymm8,%ymm7,%ymm7
vpclmulqdq $0x00,%ymm27,%ymm4,%ymm10
vpclmulqdq $0x00,%ymm28,%ymm5,%ymm24
vpclmulqdq $0x00,%ymm29,%ymm6,%ymm25
vaesenc %ymm16,%ymm0,%ymm0
vaesenc %ymm16,%ymm1,%ymm1
vaesenc %ymm16,%ymm2,%ymm2
vaesenc %ymm16,%ymm3,%ymm3
vpxord %ymm24,%ymm10,%ymm10
vpclmulqdq $0x00,%ymm30,%ymm7,%ymm26
vpternlogd $0x96,%ymm26,%ymm25,%ymm10
vpclmulqdq $0x01,%ymm27,%ymm4,%ymm24
vaesenc %ymm17,%ymm0,%ymm0
vaesenc %ymm17,%ymm1,%ymm1
vaesenc %ymm17,%ymm2,%ymm2
vaesenc %ymm17,%ymm3,%ymm3
vpclmulqdq $0x01,%ymm28,%ymm5,%ymm25
vpclmulqdq $0x01,%ymm29,%ymm6,%ymm26
vpternlogd $0x96,%ymm26,%ymm25,%ymm24
vpclmulqdq $0x01,%ymm30,%ymm7,%ymm25
vaesenc %ymm18,%ymm0,%ymm0
vaesenc %ymm18,%ymm1,%ymm1
vaesenc %ymm18,%ymm2,%ymm2
vaesenc %ymm18,%ymm3,%ymm3
vpclmulqdq $0x10,%ymm27,%ymm4,%ymm26
vpternlogd $0x96,%ymm26,%ymm25,%ymm24
vpclmulqdq $0x10,%ymm28,%ymm5,%ymm25
vpclmulqdq $0x10,%ymm29,%ymm6,%ymm26
vaesenc %ymm19,%ymm0,%ymm0
vaesenc %ymm19,%ymm1,%ymm1
vaesenc %ymm19,%ymm2,%ymm2
vaesenc %ymm19,%ymm3,%ymm3
vpternlogd $0x96,%ymm26,%ymm25,%ymm24
vpclmulqdq $0x01,%ymm10,%ymm31,%ymm26
vpclmulqdq $0x10,%ymm30,%ymm7,%ymm25
vpxord %ymm25,%ymm24,%ymm24
vaesenc %ymm20,%ymm0,%ymm0
vaesenc %ymm20,%ymm1,%ymm1
vaesenc %ymm20,%ymm2,%ymm2
vaesenc %ymm20,%ymm3,%ymm3
vpshufd $0x4e,%ymm10,%ymm10
vpclmulqdq $0x11,%ymm27,%ymm4,%ymm4
vpclmulqdq $0x11,%ymm28,%ymm5,%ymm5
vpclmulqdq $0x11,%ymm29,%ymm6,%ymm6
vaesenc %ymm21,%ymm0,%ymm0
vaesenc %ymm21,%ymm1,%ymm1
vaesenc %ymm21,%ymm2,%ymm2
vaesenc %ymm21,%ymm3,%ymm3
vpternlogd $0x96,%ymm26,%ymm10,%ymm24
vpclmulqdq $0x11,%ymm30,%ymm7,%ymm7
vpternlogd $0x96,%ymm6,%ymm5,%ymm4
vpclmulqdq $0x01,%ymm24,%ymm31,%ymm25
vaesenc %ymm22,%ymm0,%ymm0
vaesenc %ymm22,%ymm1,%ymm1
vaesenc %ymm22,%ymm2,%ymm2
vaesenc %ymm22,%ymm3,%ymm3
vpxord %ymm7,%ymm4,%ymm10
vpshufd $0x4e,%ymm24,%ymm24
vpternlogd $0x96,%ymm25,%ymm24,%ymm10
vaesenc %ymm23,%ymm0,%ymm0
vaesenc %ymm23,%ymm1,%ymm1
vaesenc %ymm23,%ymm2,%ymm2
vaesenc %ymm23,%ymm3,%ymm3
vextracti32x4 $1,%ymm10,%xmm4
vpxord %xmm4,%xmm10,%xmm10
vpxord 0(%rdi),%ymm14,%ymm4
vpxord 32(%rdi),%ymm14,%ymm5
vpxord 64(%rdi),%ymm14,%ymm6
vpxord 96(%rdi),%ymm14,%ymm7
vaesenclast %ymm4,%ymm0,%ymm4
vaesenclast %ymm5,%ymm1,%ymm5
vaesenclast %ymm6,%ymm2,%ymm6
vaesenclast %ymm7,%ymm3,%ymm7
vmovdqu8 %ymm4,0(%rsi)
vmovdqu8 %ymm5,32(%rsi)
vmovdqu8 %ymm6,64(%rsi)
vmovdqu8 %ymm7,96(%rsi)
subq $-128,%rdi
subq $-128,%rsi
addq $-128,%rdx
cmpq $128-1,%rdx
ja L$crypt_loop_4x__func1
L$ghash_last_ciphertext_4x__func1:
vpshufb %ymm8,%ymm4,%ymm4
vpxord %ymm10,%ymm4,%ymm4
vpshufb %ymm8,%ymm5,%ymm5
vpshufb %ymm8,%ymm6,%ymm6
vpshufb %ymm8,%ymm7,%ymm7
vpclmulqdq $0x00,%ymm27,%ymm4,%ymm10
vpclmulqdq $0x00,%ymm28,%ymm5,%ymm24
vpclmulqdq $0x00,%ymm29,%ymm6,%ymm25
vpxord %ymm24,%ymm10,%ymm10
vpclmulqdq $0x00,%ymm30,%ymm7,%ymm26
vpternlogd $0x96,%ymm26,%ymm25,%ymm10
vpclmulqdq $0x01,%ymm27,%ymm4,%ymm24
vpclmulqdq $0x01,%ymm28,%ymm5,%ymm25
vpclmulqdq $0x01,%ymm29,%ymm6,%ymm26
vpternlogd $0x96,%ymm26,%ymm25,%ymm24
vpclmulqdq $0x01,%ymm30,%ymm7,%ymm25
vpclmulqdq $0x10,%ymm27,%ymm4,%ymm26
vpternlogd $0x96,%ymm26,%ymm25,%ymm24
vpclmulqdq $0x10,%ymm28,%ymm5,%ymm25
vpclmulqdq $0x10,%ymm29,%ymm6,%ymm26
vpternlogd $0x96,%ymm26,%ymm25,%ymm24
vpclmulqdq $0x01,%ymm10,%ymm31,%ymm26
vpclmulqdq $0x10,%ymm30,%ymm7,%ymm25
vpxord %ymm25,%ymm24,%ymm24
vpshufd $0x4e,%ymm10,%ymm10
vpclmulqdq $0x11,%ymm27,%ymm4,%ymm4
vpclmulqdq $0x11,%ymm28,%ymm5,%ymm5
vpclmulqdq $0x11,%ymm29,%ymm6,%ymm6
vpternlogd $0x96,%ymm26,%ymm10,%ymm24
vpclmulqdq $0x11,%ymm30,%ymm7,%ymm7
vpternlogd $0x96,%ymm6,%ymm5,%ymm4
vpclmulqdq $0x01,%ymm24,%ymm31,%ymm25
vpxord %ymm7,%ymm4,%ymm10
vpshufd $0x4e,%ymm24,%ymm24
vpternlogd $0x96,%ymm25,%ymm24,%ymm10
vextracti32x4 $1,%ymm10,%xmm4
vpxord %xmm4,%xmm10,%xmm10
L$crypt_loop_4x_done__func1:
testq %rdx,%rdx
jz L$done__func1
movq %rdx,%rax
negq %rax
andq $-16,%rax
leaq 256(%r9,%rax,1),%r8
vpxor %xmm4,%xmm4,%xmm4
vpxor %xmm5,%xmm5,%xmm5
vpxor %xmm6,%xmm6,%xmm6
cmpq $32,%rdx
jb L$partial_vec__func1
L$crypt_loop_1x__func1:
vpshufb %ymm8,%ymm12,%ymm0
vpaddd %ymm11,%ymm12,%ymm12
vpxord %ymm13,%ymm0,%ymm0
leaq 16(%rcx),%rax
L$vaesenc_loop_tail_full_vec__func1:
vbroadcasti32x4 (%rax),%ymm9
vaesenc %ymm9,%ymm0,%ymm0
addq $16,%rax
cmpq %rax,%r11
jne L$vaesenc_loop_tail_full_vec__func1
vaesenclast %ymm14,%ymm0,%ymm0
vmovdqu8 (%rdi),%ymm1
vpxord %ymm1,%ymm0,%ymm0
vmovdqu8 %ymm0,(%rsi)
vmovdqu8 (%r8),%ymm30
vpshufb %ymm8,%ymm0,%ymm0
vpxord %ymm10,%ymm0,%ymm0
vpclmulqdq $0x00,%ymm30,%ymm0,%ymm7
vpclmulqdq $0x01,%ymm30,%ymm0,%ymm1
vpclmulqdq $0x10,%ymm30,%ymm0,%ymm2
vpclmulqdq $0x11,%ymm30,%ymm0,%ymm3
vpxord %ymm7,%ymm4,%ymm4
vpternlogd $0x96,%ymm2,%ymm1,%ymm5
vpxord %ymm3,%ymm6,%ymm6
vpxor %xmm10,%xmm10,%xmm10
addq $32,%r8
addq $32,%rdi
addq $32,%rsi
subq $32,%rdx
cmpq $32,%rdx
jae L$crypt_loop_1x__func1
testq %rdx,%rdx
jz L$reduce__func1
L$partial_vec__func1:
movq $-1,%rax
bzhiq %rdx,%rax,%rax
kmovd %eax,%k1
addq $15,%rdx
andq $-16,%rdx
movq $-1,%rax
bzhiq %rdx,%rax,%rax
kmovd %eax,%k2
vpshufb %ymm8,%ymm12,%ymm0
vpxord %ymm13,%ymm0,%ymm0
leaq 16(%rcx),%rax
L$vaesenc_loop_tail_partialvec__func1:
vbroadcasti32x4 (%rax),%ymm9
vaesenc %ymm9,%ymm0,%ymm0
addq $16,%rax
cmpq %rax,%r11
jne L$vaesenc_loop_tail_partialvec__func1
vaesenclast %ymm14,%ymm0,%ymm0
vmovdqu8 (%rdi),%ymm1{%k1}{z}
vpxord %ymm1,%ymm0,%ymm0
vmovdqu8 %ymm0,(%rsi){%k1}
vmovdqu8 (%r8),%ymm30{%k2}{z}
vmovdqu8 %ymm0,%ymm1{%k1}{z}
vpshufb %ymm8,%ymm1,%ymm0
vpxord %ymm10,%ymm0,%ymm0
vpclmulqdq $0x00,%ymm30,%ymm0,%ymm7
vpclmulqdq $0x01,%ymm30,%ymm0,%ymm1
vpclmulqdq $0x10,%ymm30,%ymm0,%ymm2
vpclmulqdq $0x11,%ymm30,%ymm0,%ymm3
vpxord %ymm7,%ymm4,%ymm4
vpternlogd $0x96,%ymm2,%ymm1,%ymm5
vpxord %ymm3,%ymm6,%ymm6
L$reduce__func1:
vpclmulqdq $0x01,%ymm4,%ymm31,%ymm0
vpshufd $0x4e,%ymm4,%ymm4
vpternlogd $0x96,%ymm0,%ymm4,%ymm5
vpclmulqdq $0x01,%ymm5,%ymm31,%ymm0
vpshufd $0x4e,%ymm5,%ymm5
vpternlogd $0x96,%ymm0,%ymm5,%ymm6
vextracti32x4 $1,%ymm6,%xmm0
vpxord %xmm0,%xmm6,%xmm10
L$done__func1:
vpshufb %xmm8,%xmm10,%xmm10
vmovdqu %xmm10,(%r12)
vzeroupper
popq %r12
ret
.globl _aes_gcm_dec_update_vaes_avx10_256
.private_extern _aes_gcm_dec_update_vaes_avx10_256
.p2align 5
_aes_gcm_dec_update_vaes_avx10_256:
_CET_ENDBR
pushq %r12
movq 16(%rsp),%r12
vbroadcasti32x4 L$bswap_mask(%rip),%ymm8
vbroadcasti32x4 L$gfpoly(%rip),%ymm31
vmovdqu (%r12),%xmm10
vpshufb %xmm8,%xmm10,%xmm10
vbroadcasti32x4 (%r8),%ymm12
vpshufb %ymm8,%ymm12,%ymm12
movl 240(%rcx),%r10d
leal -20(,%r10,4),%r10d
leaq 96(%rcx,%r10,4),%r11
vbroadcasti32x4 (%rcx),%ymm13
vbroadcasti32x4 (%r11),%ymm14
vpaddd L$ctr_pattern(%rip),%ymm12,%ymm12
vbroadcasti32x4 L$inc_2blocks(%rip),%ymm11
cmpq $128-1,%rdx
jbe L$crypt_loop_4x_done__func2
vmovdqu8 256-128(%r9),%ymm27
vmovdqu8 256-96(%r9),%ymm28
vmovdqu8 256-64(%r9),%ymm29
vmovdqu8 256-32(%r9),%ymm30
vbroadcasti32x4 -144(%r11),%ymm15
vbroadcasti32x4 -128(%r11),%ymm16
vbroadcasti32x4 -112(%r11),%ymm17
vbroadcasti32x4 -96(%r11),%ymm18
vbroadcasti32x4 -80(%r11),%ymm19
vbroadcasti32x4 -64(%r11),%ymm20
vbroadcasti32x4 -48(%r11),%ymm21
vbroadcasti32x4 -32(%r11),%ymm22
vbroadcasti32x4 -16(%r11),%ymm23
L$crypt_loop_4x__func2:
vmovdqu8 0(%rdi),%ymm4
vmovdqu8 32(%rdi),%ymm5
vmovdqu8 64(%rdi),%ymm6
vmovdqu8 96(%rdi),%ymm7
vpshufb %ymm8,%ymm12,%ymm0
vpaddd %ymm11,%ymm12,%ymm12
vpshufb %ymm8,%ymm12,%ymm1
vpaddd %ymm11,%ymm12,%ymm12
vpshufb %ymm8,%ymm12,%ymm2
vpaddd %ymm11,%ymm12,%ymm12
vpshufb %ymm8,%ymm12,%ymm3
vpaddd %ymm11,%ymm12,%ymm12
vpxord %ymm13,%ymm0,%ymm0
vpxord %ymm13,%ymm1,%ymm1
vpxord %ymm13,%ymm2,%ymm2
vpxord %ymm13,%ymm3,%ymm3
cmpl $24,%r10d
jl L$aes128__func2
je L$aes192__func2
vbroadcasti32x4 -208(%r11),%ymm9
vaesenc %ymm9,%ymm0,%ymm0
vaesenc %ymm9,%ymm1,%ymm1
vaesenc %ymm9,%ymm2,%ymm2
vaesenc %ymm9,%ymm3,%ymm3
vbroadcasti32x4 -192(%r11),%ymm9
vaesenc %ymm9,%ymm0,%ymm0
vaesenc %ymm9,%ymm1,%ymm1
vaesenc %ymm9,%ymm2,%ymm2
vaesenc %ymm9,%ymm3,%ymm3
L$aes192__func2:
vbroadcasti32x4 -176(%r11),%ymm9
vaesenc %ymm9,%ymm0,%ymm0
vaesenc %ymm9,%ymm1,%ymm1
vaesenc %ymm9,%ymm2,%ymm2
vaesenc %ymm9,%ymm3,%ymm3
vbroadcasti32x4 -160(%r11),%ymm9
vaesenc %ymm9,%ymm0,%ymm0
vaesenc %ymm9,%ymm1,%ymm1
vaesenc %ymm9,%ymm2,%ymm2
vaesenc %ymm9,%ymm3,%ymm3
L$aes128__func2:
vpshufb %ymm8,%ymm4,%ymm4
vpxord %ymm10,%ymm4,%ymm4
vpshufb %ymm8,%ymm5,%ymm5
vpshufb %ymm8,%ymm6,%ymm6
vaesenc %ymm15,%ymm0,%ymm0
vaesenc %ymm15,%ymm1,%ymm1
vaesenc %ymm15,%ymm2,%ymm2
vaesenc %ymm15,%ymm3,%ymm3
vpshufb %ymm8,%ymm7,%ymm7
vpclmulqdq $0x00,%ymm27,%ymm4,%ymm10
vpclmulqdq $0x00,%ymm28,%ymm5,%ymm24
vpclmulqdq $0x00,%ymm29,%ymm6,%ymm25
vaesenc %ymm16,%ymm0,%ymm0
vaesenc %ymm16,%ymm1,%ymm1
vaesenc %ymm16,%ymm2,%ymm2
vaesenc %ymm16,%ymm3,%ymm3
vpxord %ymm24,%ymm10,%ymm10
vpclmulqdq $0x00,%ymm30,%ymm7,%ymm26
vpternlogd $0x96,%ymm26,%ymm25,%ymm10
vpclmulqdq $0x01,%ymm27,%ymm4,%ymm24
vaesenc %ymm17,%ymm0,%ymm0
vaesenc %ymm17,%ymm1,%ymm1
vaesenc %ymm17,%ymm2,%ymm2
vaesenc %ymm17,%ymm3,%ymm3
vpclmulqdq $0x01,%ymm28,%ymm5,%ymm25
vpclmulqdq $0x01,%ymm29,%ymm6,%ymm26
vpternlogd $0x96,%ymm26,%ymm25,%ymm24
vpclmulqdq $0x01,%ymm30,%ymm7,%ymm25
vaesenc %ymm18,%ymm0,%ymm0
vaesenc %ymm18,%ymm1,%ymm1
vaesenc %ymm18,%ymm2,%ymm2
vaesenc %ymm18,%ymm3,%ymm3
vpclmulqdq $0x10,%ymm27,%ymm4,%ymm26
vpternlogd $0x96,%ymm26,%ymm25,%ymm24
vpclmulqdq $0x10,%ymm28,%ymm5,%ymm25
vpclmulqdq $0x10,%ymm29,%ymm6,%ymm26
vaesenc %ymm19,%ymm0,%ymm0
vaesenc %ymm19,%ymm1,%ymm1
vaesenc %ymm19,%ymm2,%ymm2
vaesenc %ymm19,%ymm3,%ymm3
vpternlogd $0x96,%ymm26,%ymm25,%ymm24
vpclmulqdq $0x01,%ymm10,%ymm31,%ymm26
vpclmulqdq $0x10,%ymm30,%ymm7,%ymm25
vpxord %ymm25,%ymm24,%ymm24
vaesenc %ymm20,%ymm0,%ymm0
vaesenc %ymm20,%ymm1,%ymm1
vaesenc %ymm20,%ymm2,%ymm2
vaesenc %ymm20,%ymm3,%ymm3
vpshufd $0x4e,%ymm10,%ymm10
vpclmulqdq $0x11,%ymm27,%ymm4,%ymm4
vpclmulqdq $0x11,%ymm28,%ymm5,%ymm5
vpclmulqdq $0x11,%ymm29,%ymm6,%ymm6
vaesenc %ymm21,%ymm0,%ymm0
vaesenc %ymm21,%ymm1,%ymm1
vaesenc %ymm21,%ymm2,%ymm2
vaesenc %ymm21,%ymm3,%ymm3
vpternlogd $0x96,%ymm26,%ymm10,%ymm24
vpclmulqdq $0x11,%ymm30,%ymm7,%ymm7
vpternlogd $0x96,%ymm6,%ymm5,%ymm4
vpclmulqdq $0x01,%ymm24,%ymm31,%ymm25
vaesenc %ymm22,%ymm0,%ymm0
vaesenc %ymm22,%ymm1,%ymm1
vaesenc %ymm22,%ymm2,%ymm2
vaesenc %ymm22,%ymm3,%ymm3
vpxord %ymm7,%ymm4,%ymm10
vpshufd $0x4e,%ymm24,%ymm24
vpternlogd $0x96,%ymm25,%ymm24,%ymm10
vaesenc %ymm23,%ymm0,%ymm0
vaesenc %ymm23,%ymm1,%ymm1
vaesenc %ymm23,%ymm2,%ymm2
vaesenc %ymm23,%ymm3,%ymm3
vextracti32x4 $1,%ymm10,%xmm4
vpxord %xmm4,%xmm10,%xmm10
vpxord 0(%rdi),%ymm14,%ymm4
vpxord 32(%rdi),%ymm14,%ymm5
vpxord 64(%rdi),%ymm14,%ymm6
vpxord 96(%rdi),%ymm14,%ymm7
vaesenclast %ymm4,%ymm0,%ymm4
vaesenclast %ymm5,%ymm1,%ymm5
vaesenclast %ymm6,%ymm2,%ymm6
vaesenclast %ymm7,%ymm3,%ymm7
vmovdqu8 %ymm4,0(%rsi)
vmovdqu8 %ymm5,32(%rsi)
vmovdqu8 %ymm6,64(%rsi)
vmovdqu8 %ymm7,96(%rsi)
subq $-128,%rdi
subq $-128,%rsi
addq $-128,%rdx
cmpq $128-1,%rdx
ja L$crypt_loop_4x__func2
L$crypt_loop_4x_done__func2:
testq %rdx,%rdx
jz L$done__func2
movq %rdx,%rax
negq %rax
andq $-16,%rax
leaq 256(%r9,%rax,1),%r8
vpxor %xmm4,%xmm4,%xmm4
vpxor %xmm5,%xmm5,%xmm5
vpxor %xmm6,%xmm6,%xmm6
cmpq $32,%rdx
jb L$partial_vec__func2
L$crypt_loop_1x__func2:
vpshufb %ymm8,%ymm12,%ymm0
vpaddd %ymm11,%ymm12,%ymm12
vpxord %ymm13,%ymm0,%ymm0
leaq 16(%rcx),%rax
L$vaesenc_loop_tail_full_vec__func2:
vbroadcasti32x4 (%rax),%ymm9
vaesenc %ymm9,%ymm0,%ymm0
addq $16,%rax
cmpq %rax,%r11
jne L$vaesenc_loop_tail_full_vec__func2
vaesenclast %ymm14,%ymm0,%ymm0
vmovdqu8 (%rdi),%ymm1
vpxord %ymm1,%ymm0,%ymm0
vmovdqu8 %ymm0,(%rsi)
vmovdqu8 (%r8),%ymm30
vpshufb %ymm8,%ymm1,%ymm0
vpxord %ymm10,%ymm0,%ymm0
vpclmulqdq $0x00,%ymm30,%ymm0,%ymm7
vpclmulqdq $0x01,%ymm30,%ymm0,%ymm1
vpclmulqdq $0x10,%ymm30,%ymm0,%ymm2
vpclmulqdq $0x11,%ymm30,%ymm0,%ymm3
vpxord %ymm7,%ymm4,%ymm4
vpternlogd $0x96,%ymm2,%ymm1,%ymm5
vpxord %ymm3,%ymm6,%ymm6
vpxor %xmm10,%xmm10,%xmm10
addq $32,%r8
addq $32,%rdi
addq $32,%rsi
subq $32,%rdx
cmpq $32,%rdx
jae L$crypt_loop_1x__func2
testq %rdx,%rdx
jz L$reduce__func2
L$partial_vec__func2:
movq $-1,%rax
bzhiq %rdx,%rax,%rax
kmovd %eax,%k1
addq $15,%rdx
andq $-16,%rdx
movq $-1,%rax
bzhiq %rdx,%rax,%rax
kmovd %eax,%k2
vpshufb %ymm8,%ymm12,%ymm0
vpxord %ymm13,%ymm0,%ymm0
leaq 16(%rcx),%rax
L$vaesenc_loop_tail_partialvec__func2:
vbroadcasti32x4 (%rax),%ymm9
vaesenc %ymm9,%ymm0,%ymm0
addq $16,%rax
cmpq %rax,%r11
jne L$vaesenc_loop_tail_partialvec__func2
vaesenclast %ymm14,%ymm0,%ymm0
vmovdqu8 (%rdi),%ymm1{%k1}{z}
vpxord %ymm1,%ymm0,%ymm0
vmovdqu8 %ymm0,(%rsi){%k1}
vmovdqu8 (%r8),%ymm30{%k2}{z}
vpshufb %ymm8,%ymm1,%ymm0
vpxord %ymm10,%ymm0,%ymm0
vpclmulqdq $0x00,%ymm30,%ymm0,%ymm7
vpclmulqdq $0x01,%ymm30,%ymm0,%ymm1
vpclmulqdq $0x10,%ymm30,%ymm0,%ymm2
vpclmulqdq $0x11,%ymm30,%ymm0,%ymm3
vpxord %ymm7,%ymm4,%ymm4
vpternlogd $0x96,%ymm2,%ymm1,%ymm5
vpxord %ymm3,%ymm6,%ymm6
L$reduce__func2:
vpclmulqdq $0x01,%ymm4,%ymm31,%ymm0
vpshufd $0x4e,%ymm4,%ymm4
vpternlogd $0x96,%ymm0,%ymm4,%ymm5
vpclmulqdq $0x01,%ymm5,%ymm31,%ymm0
vpshufd $0x4e,%ymm5,%ymm5
vpternlogd $0x96,%ymm0,%ymm5,%ymm6
vextracti32x4 $1,%ymm6,%xmm0
vpxord %xmm0,%xmm6,%xmm10
L$done__func2:
vpshufb %xmm8,%xmm10,%xmm10
vmovdqu %xmm10,(%r12)
vzeroupper
popq %r12
ret
.globl _gcm_ghash_vpclmulqdq_avx10_512
.private_extern _gcm_ghash_vpclmulqdq_avx10_512
.p2align 5
_gcm_ghash_vpclmulqdq_avx10_512:
_CET_ENDBR
vmovdqu L$bswap_mask(%rip),%xmm4
vmovdqu L$gfpoly(%rip),%xmm10
vmovdqu (%rdi),%xmm5
vpshufb %xmm4,%xmm5,%xmm5
cmpq $64,%rcx
jb L$aad_blockbyblock__func2
vshufi64x2 $0,%zmm4,%zmm4,%zmm4
vshufi64x2 $0,%zmm10,%zmm10,%zmm10
vmovdqu8 256-64(%rsi),%zmm9
cmpq $256-1,%rcx
jbe L$aad_loop_1x__func2
vmovdqu8 256-256(%rsi),%zmm6
vmovdqu8 256-192(%rsi),%zmm7
vmovdqu8 256-128(%rsi),%zmm8
L$aad_loop_4x__func2:
vmovdqu8 0(%rdx),%zmm0
vmovdqu8 64(%rdx),%zmm1
vmovdqu8 128(%rdx),%zmm2
vmovdqu8 192(%rdx),%zmm3
vpshufb %zmm4,%zmm0,%zmm0
vpxord %zmm5,%zmm0,%zmm0
vpshufb %zmm4,%zmm1,%zmm1
vpshufb %zmm4,%zmm2,%zmm2
vpshufb %zmm4,%zmm3,%zmm3
vpclmulqdq $0x00,%zmm6,%zmm0,%zmm5
vpclmulqdq $0x00,%zmm7,%zmm1,%zmm11
vpclmulqdq $0x00,%zmm8,%zmm2,%zmm12
vpxord %zmm11,%zmm5,%zmm5
vpclmulqdq $0x00,%zmm9,%zmm3,%zmm13
vpternlogd $0x96,%zmm13,%zmm12,%zmm5
vpclmulqdq $0x01,%zmm6,%zmm0,%zmm11
vpclmulqdq $0x01,%zmm7,%zmm1,%zmm12
vpclmulqdq $0x01,%zmm8,%zmm2,%zmm13
vpternlogd $0x96,%zmm13,%zmm12,%zmm11
vpclmulqdq $0x01,%zmm9,%zmm3,%zmm12
vpclmulqdq $0x10,%zmm6,%zmm0,%zmm13
vpternlogd $0x96,%zmm13,%zmm12,%zmm11
vpclmulqdq $0x10,%zmm7,%zmm1,%zmm12
vpclmulqdq $0x10,%zmm8,%zmm2,%zmm13
vpternlogd $0x96,%zmm13,%zmm12,%zmm11
vpclmulqdq $0x01,%zmm5,%zmm10,%zmm13
vpclmulqdq $0x10,%zmm9,%zmm3,%zmm12
vpxord %zmm12,%zmm11,%zmm11
vpshufd $0x4e,%zmm5,%zmm5
vpclmulqdq $0x11,%zmm6,%zmm0,%zmm0
vpclmulqdq $0x11,%zmm7,%zmm1,%zmm1
vpclmulqdq $0x11,%zmm8,%zmm2,%zmm2
vpternlogd $0x96,%zmm13,%zmm5,%zmm11
vpclmulqdq $0x11,%zmm9,%zmm3,%zmm3
vpternlogd $0x96,%zmm2,%zmm1,%zmm0
vpclmulqdq $0x01,%zmm11,%zmm10,%zmm12
vpxord %zmm3,%zmm0,%zmm5
vpshufd $0x4e,%zmm11,%zmm11
vpternlogd $0x96,%zmm12,%zmm11,%zmm5
vextracti32x4 $1,%zmm5,%xmm0
vextracti32x4 $2,%zmm5,%xmm1
vextracti32x4 $3,%zmm5,%xmm2
vpxord %xmm0,%xmm5,%xmm5
vpternlogd $0x96,%xmm1,%xmm2,%xmm5
subq $-256,%rdx
addq $-256,%rcx
cmpq $256-1,%rcx
ja L$aad_loop_4x__func2
cmpq $64,%rcx
jb L$aad_large_done__func2
L$aad_loop_1x__func2:
vmovdqu8 (%rdx),%zmm0
vpshufb %zmm4,%zmm0,%zmm0
vpxord %zmm0,%zmm5,%zmm5
vpclmulqdq $0x00,%zmm9,%zmm5,%zmm0
vpclmulqdq $0x01,%zmm9,%zmm5,%zmm1
vpclmulqdq $0x10,%zmm9,%zmm5,%zmm2
vpxord %zmm2,%zmm1,%zmm1
vpclmulqdq $0x01,%zmm0,%zmm10,%zmm2
vpshufd $0x4e,%zmm0,%zmm0
vpternlogd $0x96,%zmm2,%zmm0,%zmm1
vpclmulqdq $0x11,%zmm9,%zmm5,%zmm5
vpclmulqdq $0x01,%zmm1,%zmm10,%zmm0
vpshufd $0x4e,%zmm1,%zmm1
vpternlogd $0x96,%zmm0,%zmm1,%zmm5
vextracti32x4 $1,%zmm5,%xmm0
vextracti32x4 $2,%zmm5,%xmm1
vextracti32x4 $3,%zmm5,%xmm2
vpxord %xmm0,%xmm5,%xmm5
vpternlogd $0x96,%xmm1,%xmm2,%xmm5
addq $64,%rdx
subq $64,%rcx
cmpq $64,%rcx
jae L$aad_loop_1x__func2
L$aad_large_done__func2:
vzeroupper
L$aad_blockbyblock__func2:
testq %rcx,%rcx
jz L$aad_done__func2
vmovdqu 256-16(%rsi),%xmm9
L$aad_loop_blockbyblock__func2:
vmovdqu (%rdx),%xmm0
vpshufb %xmm4,%xmm0,%xmm0
vpxor %xmm0,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm9,%xmm5,%xmm0
vpclmulqdq $0x01,%xmm9,%xmm5,%xmm1
vpclmulqdq $0x10,%xmm9,%xmm5,%xmm2
vpxord %xmm2,%xmm1,%xmm1
vpclmulqdq $0x01,%xmm0,%xmm10,%xmm2
vpshufd $0x4e,%xmm0,%xmm0
vpternlogd $0x96,%xmm2,%xmm0,%xmm1
vpclmulqdq $0x11,%xmm9,%xmm5,%xmm5
vpclmulqdq $0x01,%xmm1,%xmm10,%xmm0
vpshufd $0x4e,%xmm1,%xmm1
vpternlogd $0x96,%xmm0,%xmm1,%xmm5
addq $16,%rdx
subq $16,%rcx
jnz L$aad_loop_blockbyblock__func2
L$aad_done__func2:
vpshufb %xmm4,%xmm5,%xmm5
vmovdqu %xmm5,(%rdi)
ret
.globl _aes_gcm_enc_update_vaes_avx10_512
.private_extern _aes_gcm_enc_update_vaes_avx10_512
.p2align 5
_aes_gcm_enc_update_vaes_avx10_512:
_CET_ENDBR
pushq %r12
movq 16(%rsp),%r12
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,_BORINGSSL_function_hit+7(%rip)
#endif
vbroadcasti32x4 L$bswap_mask(%rip),%zmm8
vbroadcasti32x4 L$gfpoly(%rip),%zmm31
vmovdqu (%r12),%xmm10
vpshufb %xmm8,%xmm10,%xmm10
vbroadcasti32x4 (%r8),%zmm12
vpshufb %zmm8,%zmm12,%zmm12
movl 240(%rcx),%r10d
leal -20(,%r10,4),%r10d
leaq 96(%rcx,%r10,4),%r11
vbroadcasti32x4 (%rcx),%zmm13
vbroadcasti32x4 (%r11),%zmm14
vpaddd L$ctr_pattern(%rip),%zmm12,%zmm12
vbroadcasti32x4 L$inc_4blocks(%rip),%zmm11
cmpq $256-1,%rdx
jbe L$crypt_loop_4x_done__func3
vmovdqu8 256-256(%r9),%zmm27
vmovdqu8 256-192(%r9),%zmm28
vmovdqu8 256-128(%r9),%zmm29
vmovdqu8 256-64(%r9),%zmm30
vpshufb %zmm8,%zmm12,%zmm0
vpaddd %zmm11,%zmm12,%zmm12
vpshufb %zmm8,%zmm12,%zmm1
vpaddd %zmm11,%zmm12,%zmm12
vpshufb %zmm8,%zmm12,%zmm2
vpaddd %zmm11,%zmm12,%zmm12
vpshufb %zmm8,%zmm12,%zmm3
vpaddd %zmm11,%zmm12,%zmm12
vpxord %zmm13,%zmm0,%zmm0
vpxord %zmm13,%zmm1,%zmm1
vpxord %zmm13,%zmm2,%zmm2
vpxord %zmm13,%zmm3,%zmm3
leaq 16(%rcx),%rax
L$vaesenc_loop_first_4_vecs__func3:
vbroadcasti32x4 (%rax),%zmm9
vaesenc %zmm9,%zmm0,%zmm0
vaesenc %zmm9,%zmm1,%zmm1
vaesenc %zmm9,%zmm2,%zmm2
vaesenc %zmm9,%zmm3,%zmm3
addq $16,%rax
cmpq %rax,%r11
jne L$vaesenc_loop_first_4_vecs__func3
vpxord 0(%rdi),%zmm14,%zmm4
vpxord 64(%rdi),%zmm14,%zmm5
vpxord 128(%rdi),%zmm14,%zmm6
vpxord 192(%rdi),%zmm14,%zmm7
vaesenclast %zmm4,%zmm0,%zmm4
vaesenclast %zmm5,%zmm1,%zmm5
vaesenclast %zmm6,%zmm2,%zmm6
vaesenclast %zmm7,%zmm3,%zmm7
vmovdqu8 %zmm4,0(%rsi)
vmovdqu8 %zmm5,64(%rsi)
vmovdqu8 %zmm6,128(%rsi)
vmovdqu8 %zmm7,192(%rsi)
subq $-256,%rdi
subq $-256,%rsi
addq $-256,%rdx
cmpq $256-1,%rdx
jbe L$ghash_last_ciphertext_4x__func3
vbroadcasti32x4 -144(%r11),%zmm15
vbroadcasti32x4 -128(%r11),%zmm16
vbroadcasti32x4 -112(%r11),%zmm17
vbroadcasti32x4 -96(%r11),%zmm18
vbroadcasti32x4 -80(%r11),%zmm19
vbroadcasti32x4 -64(%r11),%zmm20
vbroadcasti32x4 -48(%r11),%zmm21
vbroadcasti32x4 -32(%r11),%zmm22
vbroadcasti32x4 -16(%r11),%zmm23
L$crypt_loop_4x__func3:
vpshufb %zmm8,%zmm12,%zmm0
vpaddd %zmm11,%zmm12,%zmm12
vpshufb %zmm8,%zmm12,%zmm1
vpaddd %zmm11,%zmm12,%zmm12
vpshufb %zmm8,%zmm12,%zmm2
vpaddd %zmm11,%zmm12,%zmm12
vpshufb %zmm8,%zmm12,%zmm3
vpaddd %zmm11,%zmm12,%zmm12
vpxord %zmm13,%zmm0,%zmm0
vpxord %zmm13,%zmm1,%zmm1
vpxord %zmm13,%zmm2,%zmm2
vpxord %zmm13,%zmm3,%zmm3
cmpl $24,%r10d
jl L$aes128__func3
je L$aes192__func3
vbroadcasti32x4 -208(%r11),%zmm9
vaesenc %zmm9,%zmm0,%zmm0
vaesenc %zmm9,%zmm1,%zmm1
vaesenc %zmm9,%zmm2,%zmm2
vaesenc %zmm9,%zmm3,%zmm3
vbroadcasti32x4 -192(%r11),%zmm9
vaesenc %zmm9,%zmm0,%zmm0
vaesenc %zmm9,%zmm1,%zmm1
vaesenc %zmm9,%zmm2,%zmm2
vaesenc %zmm9,%zmm3,%zmm3
L$aes192__func3:
vbroadcasti32x4 -176(%r11),%zmm9
vaesenc %zmm9,%zmm0,%zmm0
vaesenc %zmm9,%zmm1,%zmm1
vaesenc %zmm9,%zmm2,%zmm2
vaesenc %zmm9,%zmm3,%zmm3
vbroadcasti32x4 -160(%r11),%zmm9
vaesenc %zmm9,%zmm0,%zmm0
vaesenc %zmm9,%zmm1,%zmm1
vaesenc %zmm9,%zmm2,%zmm2
vaesenc %zmm9,%zmm3,%zmm3
L$aes128__func3:
vpshufb %zmm8,%zmm4,%zmm4
vpxord %zmm10,%zmm4,%zmm4
vpshufb %zmm8,%zmm5,%zmm5
vpshufb %zmm8,%zmm6,%zmm6
vaesenc %zmm15,%zmm0,%zmm0
vaesenc %zmm15,%zmm1,%zmm1
vaesenc %zmm15,%zmm2,%zmm2
vaesenc %zmm15,%zmm3,%zmm3
vpshufb %zmm8,%zmm7,%zmm7
vpclmulqdq $0x00,%zmm27,%zmm4,%zmm10
vpclmulqdq $0x00,%zmm28,%zmm5,%zmm24
vpclmulqdq $0x00,%zmm29,%zmm6,%zmm25
vaesenc %zmm16,%zmm0,%zmm0
vaesenc %zmm16,%zmm1,%zmm1
vaesenc %zmm16,%zmm2,%zmm2
vaesenc %zmm16,%zmm3,%zmm3
vpxord %zmm24,%zmm10,%zmm10
vpclmulqdq $0x00,%zmm30,%zmm7,%zmm26
vpternlogd $0x96,%zmm26,%zmm25,%zmm10
vpclmulqdq $0x01,%zmm27,%zmm4,%zmm24
vaesenc %zmm17,%zmm0,%zmm0
vaesenc %zmm17,%zmm1,%zmm1
vaesenc %zmm17,%zmm2,%zmm2
vaesenc %zmm17,%zmm3,%zmm3
vpclmulqdq $0x01,%zmm28,%zmm5,%zmm25
vpclmulqdq $0x01,%zmm29,%zmm6,%zmm26
vpternlogd $0x96,%zmm26,%zmm25,%zmm24
vpclmulqdq $0x01,%zmm30,%zmm7,%zmm25
vaesenc %zmm18,%zmm0,%zmm0
vaesenc %zmm18,%zmm1,%zmm1
vaesenc %zmm18,%zmm2,%zmm2
vaesenc %zmm18,%zmm3,%zmm3
vpclmulqdq $0x10,%zmm27,%zmm4,%zmm26
vpternlogd $0x96,%zmm26,%zmm25,%zmm24
vpclmulqdq $0x10,%zmm28,%zmm5,%zmm25
vpclmulqdq $0x10,%zmm29,%zmm6,%zmm26
vaesenc %zmm19,%zmm0,%zmm0
vaesenc %zmm19,%zmm1,%zmm1
vaesenc %zmm19,%zmm2,%zmm2
vaesenc %zmm19,%zmm3,%zmm3
vpternlogd $0x96,%zmm26,%zmm25,%zmm24
vpclmulqdq $0x01,%zmm10,%zmm31,%zmm26
vpclmulqdq $0x10,%zmm30,%zmm7,%zmm25
vpxord %zmm25,%zmm24,%zmm24
vaesenc %zmm20,%zmm0,%zmm0
vaesenc %zmm20,%zmm1,%zmm1
vaesenc %zmm20,%zmm2,%zmm2
vaesenc %zmm20,%zmm3,%zmm3
vpshufd $0x4e,%zmm10,%zmm10
vpclmulqdq $0x11,%zmm27,%zmm4,%zmm4
vpclmulqdq $0x11,%zmm28,%zmm5,%zmm5
vpclmulqdq $0x11,%zmm29,%zmm6,%zmm6
vaesenc %zmm21,%zmm0,%zmm0
vaesenc %zmm21,%zmm1,%zmm1
vaesenc %zmm21,%zmm2,%zmm2
vaesenc %zmm21,%zmm3,%zmm3
vpternlogd $0x96,%zmm26,%zmm10,%zmm24
vpclmulqdq $0x11,%zmm30,%zmm7,%zmm7
vpternlogd $0x96,%zmm6,%zmm5,%zmm4
vpclmulqdq $0x01,%zmm24,%zmm31,%zmm25
vaesenc %zmm22,%zmm0,%zmm0
vaesenc %zmm22,%zmm1,%zmm1
vaesenc %zmm22,%zmm2,%zmm2
vaesenc %zmm22,%zmm3,%zmm3
vpxord %zmm7,%zmm4,%zmm10
vpshufd $0x4e,%zmm24,%zmm24
vpternlogd $0x96,%zmm25,%zmm24,%zmm10
vaesenc %zmm23,%zmm0,%zmm0
vaesenc %zmm23,%zmm1,%zmm1
vaesenc %zmm23,%zmm2,%zmm2
vaesenc %zmm23,%zmm3,%zmm3
vextracti32x4 $1,%zmm10,%xmm4
vextracti32x4 $2,%zmm10,%xmm5
vextracti32x4 $3,%zmm10,%xmm6
vpxord %xmm4,%xmm10,%xmm10
vpternlogd $0x96,%xmm5,%xmm6,%xmm10
vpxord 0(%rdi),%zmm14,%zmm4
vpxord 64(%rdi),%zmm14,%zmm5
vpxord 128(%rdi),%zmm14,%zmm6
vpxord 192(%rdi),%zmm14,%zmm7
vaesenclast %zmm4,%zmm0,%zmm4
vaesenclast %zmm5,%zmm1,%zmm5
vaesenclast %zmm6,%zmm2,%zmm6
vaesenclast %zmm7,%zmm3,%zmm7
vmovdqu8 %zmm4,0(%rsi)
vmovdqu8 %zmm5,64(%rsi)
vmovdqu8 %zmm6,128(%rsi)
vmovdqu8 %zmm7,192(%rsi)
subq $-256,%rdi
subq $-256,%rsi
addq $-256,%rdx
cmpq $256-1,%rdx
ja L$crypt_loop_4x__func3
L$ghash_last_ciphertext_4x__func3:
vpshufb %zmm8,%zmm4,%zmm4
vpxord %zmm10,%zmm4,%zmm4
vpshufb %zmm8,%zmm5,%zmm5
vpshufb %zmm8,%zmm6,%zmm6
vpshufb %zmm8,%zmm7,%zmm7
vpclmulqdq $0x00,%zmm27,%zmm4,%zmm10
vpclmulqdq $0x00,%zmm28,%zmm5,%zmm24
vpclmulqdq $0x00,%zmm29,%zmm6,%zmm25
vpxord %zmm24,%zmm10,%zmm10
vpclmulqdq $0x00,%zmm30,%zmm7,%zmm26
vpternlogd $0x96,%zmm26,%zmm25,%zmm10
vpclmulqdq $0x01,%zmm27,%zmm4,%zmm24
vpclmulqdq $0x01,%zmm28,%zmm5,%zmm25
vpclmulqdq $0x01,%zmm29,%zmm6,%zmm26
vpternlogd $0x96,%zmm26,%zmm25,%zmm24
vpclmulqdq $0x01,%zmm30,%zmm7,%zmm25
vpclmulqdq $0x10,%zmm27,%zmm4,%zmm26
vpternlogd $0x96,%zmm26,%zmm25,%zmm24
vpclmulqdq $0x10,%zmm28,%zmm5,%zmm25
vpclmulqdq $0x10,%zmm29,%zmm6,%zmm26
vpternlogd $0x96,%zmm26,%zmm25,%zmm24
vpclmulqdq $0x01,%zmm10,%zmm31,%zmm26
vpclmulqdq $0x10,%zmm30,%zmm7,%zmm25
vpxord %zmm25,%zmm24,%zmm24
vpshufd $0x4e,%zmm10,%zmm10
vpclmulqdq $0x11,%zmm27,%zmm4,%zmm4
vpclmulqdq $0x11,%zmm28,%zmm5,%zmm5
vpclmulqdq $0x11,%zmm29,%zmm6,%zmm6
vpternlogd $0x96,%zmm26,%zmm10,%zmm24
vpclmulqdq $0x11,%zmm30,%zmm7,%zmm7
vpternlogd $0x96,%zmm6,%zmm5,%zmm4
vpclmulqdq $0x01,%zmm24,%zmm31,%zmm25
vpxord %zmm7,%zmm4,%zmm10
vpshufd $0x4e,%zmm24,%zmm24
vpternlogd $0x96,%zmm25,%zmm24,%zmm10
vextracti32x4 $1,%zmm10,%xmm4
vextracti32x4 $2,%zmm10,%xmm5
vextracti32x4 $3,%zmm10,%xmm6
vpxord %xmm4,%xmm10,%xmm10
vpternlogd $0x96,%xmm5,%xmm6,%xmm10
L$crypt_loop_4x_done__func3:
testq %rdx,%rdx
jz L$done__func3
movq %rdx,%rax
negq %rax
andq $-16,%rax
leaq 256(%r9,%rax,1),%r8
vpxor %xmm4,%xmm4,%xmm4
vpxor %xmm5,%xmm5,%xmm5
vpxor %xmm6,%xmm6,%xmm6
cmpq $64,%rdx
jb L$partial_vec__func3
L$crypt_loop_1x__func3:
vpshufb %zmm8,%zmm12,%zmm0
vpaddd %zmm11,%zmm12,%zmm12
vpxord %zmm13,%zmm0,%zmm0
leaq 16(%rcx),%rax
L$vaesenc_loop_tail_full_vec__func3:
vbroadcasti32x4 (%rax),%zmm9
vaesenc %zmm9,%zmm0,%zmm0
addq $16,%rax
cmpq %rax,%r11
jne L$vaesenc_loop_tail_full_vec__func3
vaesenclast %zmm14,%zmm0,%zmm0
vmovdqu8 (%rdi),%zmm1
vpxord %zmm1,%zmm0,%zmm0
vmovdqu8 %zmm0,(%rsi)
vmovdqu8 (%r8),%zmm30
vpshufb %zmm8,%zmm0,%zmm0
vpxord %zmm10,%zmm0,%zmm0
vpclmulqdq $0x00,%zmm30,%zmm0,%zmm7
vpclmulqdq $0x01,%zmm30,%zmm0,%zmm1
vpclmulqdq $0x10,%zmm30,%zmm0,%zmm2
vpclmulqdq $0x11,%zmm30,%zmm0,%zmm3
vpxord %zmm7,%zmm4,%zmm4
vpternlogd $0x96,%zmm2,%zmm1,%zmm5
vpxord %zmm3,%zmm6,%zmm6
vpxor %xmm10,%xmm10,%xmm10
addq $64,%r8
addq $64,%rdi
addq $64,%rsi
subq $64,%rdx
cmpq $64,%rdx
jae L$crypt_loop_1x__func3
testq %rdx,%rdx
jz L$reduce__func3
L$partial_vec__func3:
movq $-1,%rax
bzhiq %rdx,%rax,%rax
kmovq %rax,%k1
addq $15,%rdx
andq $-16,%rdx
movq $-1,%rax
bzhiq %rdx,%rax,%rax
kmovq %rax,%k2
vpshufb %zmm8,%zmm12,%zmm0
vpxord %zmm13,%zmm0,%zmm0
leaq 16(%rcx),%rax
L$vaesenc_loop_tail_partialvec__func3:
vbroadcasti32x4 (%rax),%zmm9
vaesenc %zmm9,%zmm0,%zmm0
addq $16,%rax
cmpq %rax,%r11
jne L$vaesenc_loop_tail_partialvec__func3
vaesenclast %zmm14,%zmm0,%zmm0
vmovdqu8 (%rdi),%zmm1{%k1}{z}
vpxord %zmm1,%zmm0,%zmm0
vmovdqu8 %zmm0,(%rsi){%k1}
vmovdqu8 (%r8),%zmm30{%k2}{z}
vmovdqu8 %zmm0,%zmm1{%k1}{z}
vpshufb %zmm8,%zmm1,%zmm0
vpxord %zmm10,%zmm0,%zmm0
vpclmulqdq $0x00,%zmm30,%zmm0,%zmm7
vpclmulqdq $0x01,%zmm30,%zmm0,%zmm1
vpclmulqdq $0x10,%zmm30,%zmm0,%zmm2
vpclmulqdq $0x11,%zmm30,%zmm0,%zmm3
vpxord %zmm7,%zmm4,%zmm4
vpternlogd $0x96,%zmm2,%zmm1,%zmm5
vpxord %zmm3,%zmm6,%zmm6
L$reduce__func3:
vpclmulqdq $0x01,%zmm4,%zmm31,%zmm0
vpshufd $0x4e,%zmm4,%zmm4
vpternlogd $0x96,%zmm0,%zmm4,%zmm5
vpclmulqdq $0x01,%zmm5,%zmm31,%zmm0
vpshufd $0x4e,%zmm5,%zmm5
vpternlogd $0x96,%zmm0,%zmm5,%zmm6
vextracti32x4 $1,%zmm6,%xmm0
vextracti32x4 $2,%zmm6,%xmm1
vextracti32x4 $3,%zmm6,%xmm2
vpxord %xmm0,%xmm6,%xmm10
vpternlogd $0x96,%xmm1,%xmm2,%xmm10
L$done__func3:
vpshufb %xmm8,%xmm10,%xmm10
vmovdqu %xmm10,(%r12)
vzeroupper
popq %r12
ret
.globl _aes_gcm_dec_update_vaes_avx10_512
.private_extern _aes_gcm_dec_update_vaes_avx10_512
.p2align 5
_aes_gcm_dec_update_vaes_avx10_512:
_CET_ENDBR
pushq %r12
movq 16(%rsp),%r12
vbroadcasti32x4 L$bswap_mask(%rip),%zmm8
vbroadcasti32x4 L$gfpoly(%rip),%zmm31
vmovdqu (%r12),%xmm10
vpshufb %xmm8,%xmm10,%xmm10
vbroadcasti32x4 (%r8),%zmm12
vpshufb %zmm8,%zmm12,%zmm12
movl 240(%rcx),%r10d
leal -20(,%r10,4),%r10d
leaq 96(%rcx,%r10,4),%r11
vbroadcasti32x4 (%rcx),%zmm13
vbroadcasti32x4 (%r11),%zmm14
vpaddd L$ctr_pattern(%rip),%zmm12,%zmm12
vbroadcasti32x4 L$inc_4blocks(%rip),%zmm11
cmpq $256-1,%rdx
jbe L$crypt_loop_4x_done__func4
vmovdqu8 256-256(%r9),%zmm27
vmovdqu8 256-192(%r9),%zmm28
vmovdqu8 256-128(%r9),%zmm29
vmovdqu8 256-64(%r9),%zmm30
vbroadcasti32x4 -144(%r11),%zmm15
vbroadcasti32x4 -128(%r11),%zmm16
vbroadcasti32x4 -112(%r11),%zmm17
vbroadcasti32x4 -96(%r11),%zmm18
vbroadcasti32x4 -80(%r11),%zmm19
vbroadcasti32x4 -64(%r11),%zmm20
vbroadcasti32x4 -48(%r11),%zmm21
vbroadcasti32x4 -32(%r11),%zmm22
vbroadcasti32x4 -16(%r11),%zmm23
L$crypt_loop_4x__func4:
vmovdqu8 0(%rdi),%zmm4
vmovdqu8 64(%rdi),%zmm5
vmovdqu8 128(%rdi),%zmm6
vmovdqu8 192(%rdi),%zmm7
vpshufb %zmm8,%zmm12,%zmm0
vpaddd %zmm11,%zmm12,%zmm12
vpshufb %zmm8,%zmm12,%zmm1
vpaddd %zmm11,%zmm12,%zmm12
vpshufb %zmm8,%zmm12,%zmm2
vpaddd %zmm11,%zmm12,%zmm12
vpshufb %zmm8,%zmm12,%zmm3
vpaddd %zmm11,%zmm12,%zmm12
vpxord %zmm13,%zmm0,%zmm0
vpxord %zmm13,%zmm1,%zmm1
vpxord %zmm13,%zmm2,%zmm2
vpxord %zmm13,%zmm3,%zmm3
cmpl $24,%r10d
jl L$aes128__func4
je L$aes192__func4
vbroadcasti32x4 -208(%r11),%zmm9
vaesenc %zmm9,%zmm0,%zmm0
vaesenc %zmm9,%zmm1,%zmm1
vaesenc %zmm9,%zmm2,%zmm2
vaesenc %zmm9,%zmm3,%zmm3
vbroadcasti32x4 -192(%r11),%zmm9
vaesenc %zmm9,%zmm0,%zmm0
vaesenc %zmm9,%zmm1,%zmm1
vaesenc %zmm9,%zmm2,%zmm2
vaesenc %zmm9,%zmm3,%zmm3
L$aes192__func4:
vbroadcasti32x4 -176(%r11),%zmm9
vaesenc %zmm9,%zmm0,%zmm0
vaesenc %zmm9,%zmm1,%zmm1
vaesenc %zmm9,%zmm2,%zmm2
vaesenc %zmm9,%zmm3,%zmm3
vbroadcasti32x4 -160(%r11),%zmm9
vaesenc %zmm9,%zmm0,%zmm0
vaesenc %zmm9,%zmm1,%zmm1
vaesenc %zmm9,%zmm2,%zmm2
vaesenc %zmm9,%zmm3,%zmm3
L$aes128__func4:
vpshufb %zmm8,%zmm4,%zmm4
vpxord %zmm10,%zmm4,%zmm4
vpshufb %zmm8,%zmm5,%zmm5
vpshufb %zmm8,%zmm6,%zmm6
vaesenc %zmm15,%zmm0,%zmm0
vaesenc %zmm15,%zmm1,%zmm1
vaesenc %zmm15,%zmm2,%zmm2
vaesenc %zmm15,%zmm3,%zmm3
vpshufb %zmm8,%zmm7,%zmm7
vpclmulqdq $0x00,%zmm27,%zmm4,%zmm10
vpclmulqdq $0x00,%zmm28,%zmm5,%zmm24
vpclmulqdq $0x00,%zmm29,%zmm6,%zmm25
vaesenc %zmm16,%zmm0,%zmm0
vaesenc %zmm16,%zmm1,%zmm1
vaesenc %zmm16,%zmm2,%zmm2
vaesenc %zmm16,%zmm3,%zmm3
vpxord %zmm24,%zmm10,%zmm10
vpclmulqdq $0x00,%zmm30,%zmm7,%zmm26
vpternlogd $0x96,%zmm26,%zmm25,%zmm10
vpclmulqdq $0x01,%zmm27,%zmm4,%zmm24
vaesenc %zmm17,%zmm0,%zmm0
vaesenc %zmm17,%zmm1,%zmm1
vaesenc %zmm17,%zmm2,%zmm2
vaesenc %zmm17,%zmm3,%zmm3
vpclmulqdq $0x01,%zmm28,%zmm5,%zmm25
vpclmulqdq $0x01,%zmm29,%zmm6,%zmm26
vpternlogd $0x96,%zmm26,%zmm25,%zmm24
vpclmulqdq $0x01,%zmm30,%zmm7,%zmm25
vaesenc %zmm18,%zmm0,%zmm0
vaesenc %zmm18,%zmm1,%zmm1
vaesenc %zmm18,%zmm2,%zmm2
vaesenc %zmm18,%zmm3,%zmm3
vpclmulqdq $0x10,%zmm27,%zmm4,%zmm26
vpternlogd $0x96,%zmm26,%zmm25,%zmm24
vpclmulqdq $0x10,%zmm28,%zmm5,%zmm25
vpclmulqdq $0x10,%zmm29,%zmm6,%zmm26
vaesenc %zmm19,%zmm0,%zmm0
vaesenc %zmm19,%zmm1,%zmm1
vaesenc %zmm19,%zmm2,%zmm2
vaesenc %zmm19,%zmm3,%zmm3
vpternlogd $0x96,%zmm26,%zmm25,%zmm24
vpclmulqdq $0x01,%zmm10,%zmm31,%zmm26
vpclmulqdq $0x10,%zmm30,%zmm7,%zmm25
vpxord %zmm25,%zmm24,%zmm24
vaesenc %zmm20,%zmm0,%zmm0
vaesenc %zmm20,%zmm1,%zmm1
vaesenc %zmm20,%zmm2,%zmm2
vaesenc %zmm20,%zmm3,%zmm3
vpshufd $0x4e,%zmm10,%zmm10
vpclmulqdq $0x11,%zmm27,%zmm4,%zmm4
vpclmulqdq $0x11,%zmm28,%zmm5,%zmm5
vpclmulqdq $0x11,%zmm29,%zmm6,%zmm6
vaesenc %zmm21,%zmm0,%zmm0
vaesenc %zmm21,%zmm1,%zmm1
vaesenc %zmm21,%zmm2,%zmm2
vaesenc %zmm21,%zmm3,%zmm3
vpternlogd $0x96,%zmm26,%zmm10,%zmm24
vpclmulqdq $0x11,%zmm30,%zmm7,%zmm7
vpternlogd $0x96,%zmm6,%zmm5,%zmm4
vpclmulqdq $0x01,%zmm24,%zmm31,%zmm25
vaesenc %zmm22,%zmm0,%zmm0
vaesenc %zmm22,%zmm1,%zmm1
vaesenc %zmm22,%zmm2,%zmm2
vaesenc %zmm22,%zmm3,%zmm3
vpxord %zmm7,%zmm4,%zmm10
vpshufd $0x4e,%zmm24,%zmm24
vpternlogd $0x96,%zmm25,%zmm24,%zmm10
vaesenc %zmm23,%zmm0,%zmm0
vaesenc %zmm23,%zmm1,%zmm1
vaesenc %zmm23,%zmm2,%zmm2
vaesenc %zmm23,%zmm3,%zmm3
vextracti32x4 $1,%zmm10,%xmm4
vextracti32x4 $2,%zmm10,%xmm5
vextracti32x4 $3,%zmm10,%xmm6
vpxord %xmm4,%xmm10,%xmm10
vpternlogd $0x96,%xmm5,%xmm6,%xmm10
vpxord 0(%rdi),%zmm14,%zmm4
vpxord 64(%rdi),%zmm14,%zmm5
vpxord 128(%rdi),%zmm14,%zmm6
vpxord 192(%rdi),%zmm14,%zmm7
vaesenclast %zmm4,%zmm0,%zmm4
vaesenclast %zmm5,%zmm1,%zmm5
vaesenclast %zmm6,%zmm2,%zmm6
vaesenclast %zmm7,%zmm3,%zmm7
vmovdqu8 %zmm4,0(%rsi)
vmovdqu8 %zmm5,64(%rsi)
vmovdqu8 %zmm6,128(%rsi)
vmovdqu8 %zmm7,192(%rsi)
subq $-256,%rdi
subq $-256,%rsi
addq $-256,%rdx
cmpq $256-1,%rdx
ja L$crypt_loop_4x__func4
L$crypt_loop_4x_done__func4:
testq %rdx,%rdx
jz L$done__func4
movq %rdx,%rax
negq %rax
andq $-16,%rax
leaq 256(%r9,%rax,1),%r8
vpxor %xmm4,%xmm4,%xmm4
vpxor %xmm5,%xmm5,%xmm5
vpxor %xmm6,%xmm6,%xmm6
cmpq $64,%rdx
jb L$partial_vec__func4
L$crypt_loop_1x__func4:
vpshufb %zmm8,%zmm12,%zmm0
vpaddd %zmm11,%zmm12,%zmm12
vpxord %zmm13,%zmm0,%zmm0
leaq 16(%rcx),%rax
L$vaesenc_loop_tail_full_vec__func4:
vbroadcasti32x4 (%rax),%zmm9
vaesenc %zmm9,%zmm0,%zmm0
addq $16,%rax
cmpq %rax,%r11
jne L$vaesenc_loop_tail_full_vec__func4
vaesenclast %zmm14,%zmm0,%zmm0
vmovdqu8 (%rdi),%zmm1
vpxord %zmm1,%zmm0,%zmm0
vmovdqu8 %zmm0,(%rsi)
vmovdqu8 (%r8),%zmm30
vpshufb %zmm8,%zmm1,%zmm0
vpxord %zmm10,%zmm0,%zmm0
vpclmulqdq $0x00,%zmm30,%zmm0,%zmm7
vpclmulqdq $0x01,%zmm30,%zmm0,%zmm1
vpclmulqdq $0x10,%zmm30,%zmm0,%zmm2
vpclmulqdq $0x11,%zmm30,%zmm0,%zmm3
vpxord %zmm7,%zmm4,%zmm4
vpternlogd $0x96,%zmm2,%zmm1,%zmm5
vpxord %zmm3,%zmm6,%zmm6
vpxor %xmm10,%xmm10,%xmm10
addq $64,%r8
addq $64,%rdi
addq $64,%rsi
subq $64,%rdx
cmpq $64,%rdx
jae L$crypt_loop_1x__func4
testq %rdx,%rdx
jz L$reduce__func4
L$partial_vec__func4:
movq $-1,%rax
bzhiq %rdx,%rax,%rax
kmovq %rax,%k1
addq $15,%rdx
andq $-16,%rdx
movq $-1,%rax
bzhiq %rdx,%rax,%rax
kmovq %rax,%k2
vpshufb %zmm8,%zmm12,%zmm0
vpxord %zmm13,%zmm0,%zmm0
leaq 16(%rcx),%rax
L$vaesenc_loop_tail_partialvec__func4:
vbroadcasti32x4 (%rax),%zmm9
vaesenc %zmm9,%zmm0,%zmm0
addq $16,%rax
cmpq %rax,%r11
jne L$vaesenc_loop_tail_partialvec__func4
vaesenclast %zmm14,%zmm0,%zmm0
vmovdqu8 (%rdi),%zmm1{%k1}{z}
vpxord %zmm1,%zmm0,%zmm0
vmovdqu8 %zmm0,(%rsi){%k1}
vmovdqu8 (%r8),%zmm30{%k2}{z}
vpshufb %zmm8,%zmm1,%zmm0
vpxord %zmm10,%zmm0,%zmm0
vpclmulqdq $0x00,%zmm30,%zmm0,%zmm7
vpclmulqdq $0x01,%zmm30,%zmm0,%zmm1
vpclmulqdq $0x10,%zmm30,%zmm0,%zmm2
vpclmulqdq $0x11,%zmm30,%zmm0,%zmm3
vpxord %zmm7,%zmm4,%zmm4
vpternlogd $0x96,%zmm2,%zmm1,%zmm5
vpxord %zmm3,%zmm6,%zmm6
L$reduce__func4:
vpclmulqdq $0x01,%zmm4,%zmm31,%zmm0
vpshufd $0x4e,%zmm4,%zmm4
vpternlogd $0x96,%zmm0,%zmm4,%zmm5
vpclmulqdq $0x01,%zmm5,%zmm31,%zmm0
vpshufd $0x4e,%zmm5,%zmm5
vpternlogd $0x96,%zmm0,%zmm5,%zmm6
vextracti32x4 $1,%zmm6,%xmm0
vextracti32x4 $2,%zmm6,%xmm1
vextracti32x4 $3,%zmm6,%xmm2
vpxord %xmm0,%xmm6,%xmm10
vpternlogd $0x96,%xmm1,%xmm2,%xmm10
L$done__func4:
vpshufb %xmm8,%xmm10,%xmm10
vmovdqu %xmm10,(%r12)
vzeroupper
popq %r12
ret
#endif