blob: 095689cf33199b45dd8e4481c312504058423c90 [file] [log] [blame]
; This file is generated from a similarly-named Perl script in the BoringSSL
; source tree. Do not edit by hand.
%ifidn __OUTPUT_FORMAT__, win64
default rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
%define _CET_ENDBR
%ifdef BORINGSSL_PREFIX
%include "boringssl_prefix_symbols_nasm.inc"
%endif
section .text code align=64
EXTERN OPENSSL_ia32cap_P
chacha20_poly1305_constants:
section .rdata rdata align=8
ALIGN 64
$L$chacha20_consts:
DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
$L$rol8:
DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
$L$rol16:
DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
$L$avx2_init:
DD 0,0,0,0
$L$sse_inc:
DD 1,0,0,0
$L$avx2_inc:
DD 2,0,0,0,2,0,0,0
$L$clamp:
DQ 0x0FFFFFFC0FFFFFFF,0x0FFFFFFC0FFFFFFC
DQ 0xFFFFFFFFFFFFFFFF,0xFFFFFFFFFFFFFFFF
ALIGN 16
$L$and_masks:
DB 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
DB 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
DB 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
DB 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
DB 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
DB 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
section .text
ALIGN 64
poly_hash_ad_internal:
xor r10,r10
xor r11,r11
xor r12,r12
cmp r8,13
jne NEAR $L$hash_ad_loop
$L$poly_fast_tls_ad:
mov r10,QWORD[rcx]
mov r11,QWORD[5+rcx]
shr r11,24
mov r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
ret
$L$hash_ad_loop:
cmp r8,16
jb NEAR $L$hash_ad_tail
add r10,QWORD[((0+0))+rcx]
adc r11,QWORD[((8+0))+rcx]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rcx,[16+rcx]
sub r8,16
jmp NEAR $L$hash_ad_loop
$L$hash_ad_tail:
cmp r8,0
je NEAR $L$hash_ad_done
xor r13,r13
xor r14,r14
xor r15,r15
add rcx,r8
$L$hash_ad_tail_loop:
shld r14,r13,8
shl r13,8
movzx r15,BYTE[((-1))+rcx]
xor r13,r15
dec rcx
dec r8
jne NEAR $L$hash_ad_tail_loop
add r10,r13
adc r11,r14
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
$L$hash_ad_done:
ret
global chacha20_poly1305_open
ALIGN 64
chacha20_poly1305_open:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_chacha20_poly1305_open:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
mov r9,QWORD[48+rsp]
_CET_ENDBR
push rbp
push rbx
push r12
push r13
push r14
push r15
push r9
sub rsp,288 + 160 + 32
lea rbp,[32+rsp]
and rbp,-32
movaps XMMWORD[(0+0)+rbp],xmm6
movaps XMMWORD[(16+0)+rbp],xmm7
movaps XMMWORD[(32+0)+rbp],xmm8
movaps XMMWORD[(48+0)+rbp],xmm9
movaps XMMWORD[(64+0)+rbp],xmm10
movaps XMMWORD[(80+0)+rbp],xmm11
movaps XMMWORD[(96+0)+rbp],xmm12
movaps XMMWORD[(112+0)+rbp],xmm13
movaps XMMWORD[(128+0)+rbp],xmm14
movaps XMMWORD[(144+0)+rbp],xmm15
mov rbx,rdx
mov QWORD[((0+160+32))+rbp],r8
mov QWORD[((8+160+32))+rbp],rbx
mov eax,DWORD[((OPENSSL_ia32cap_P+8))]
and eax,288
xor eax,288
jz NEAR chacha20_poly1305_open_avx2
cmp rbx,128
jbe NEAR $L$open_sse_128
movdqa xmm0,XMMWORD[$L$chacha20_consts]
movdqu xmm4,XMMWORD[r9]
movdqu xmm8,XMMWORD[16+r9]
movdqu xmm12,XMMWORD[32+r9]
movdqa xmm7,xmm12
movdqa XMMWORD[(160+48)+rbp],xmm4
movdqa XMMWORD[(160+64)+rbp],xmm8
movdqa XMMWORD[(160+96)+rbp],xmm12
mov r10,10
$L$open_sse_init_rounds:
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,4
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,12
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,12
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,4
dec r10
jne NEAR $L$open_sse_init_rounds
paddd xmm0,XMMWORD[$L$chacha20_consts]
paddd xmm4,XMMWORD[((160+48))+rbp]
pand xmm0,XMMWORD[$L$clamp]
movdqa XMMWORD[(160+0)+rbp],xmm0
movdqa XMMWORD[(160+16)+rbp],xmm4
mov r8,r8
call poly_hash_ad_internal
$L$open_sse_main_loop:
cmp rbx,16*16
jb NEAR $L$open_sse_tail
movdqa xmm0,XMMWORD[$L$chacha20_consts]
movdqa xmm4,XMMWORD[((160+48))+rbp]
movdqa xmm8,XMMWORD[((160+64))+rbp]
movdqa xmm1,xmm0
movdqa xmm5,xmm4
movdqa xmm9,xmm8
movdqa xmm2,xmm0
movdqa xmm6,xmm4
movdqa xmm10,xmm8
movdqa xmm3,xmm0
movdqa xmm7,xmm4
movdqa xmm11,xmm8
movdqa xmm15,XMMWORD[((160+96))+rbp]
paddd xmm15,XMMWORD[$L$sse_inc]
movdqa xmm14,xmm15
paddd xmm14,XMMWORD[$L$sse_inc]
movdqa xmm13,xmm14
paddd xmm13,XMMWORD[$L$sse_inc]
movdqa xmm12,xmm13
paddd xmm12,XMMWORD[$L$sse_inc]
movdqa XMMWORD[(160+96)+rbp],xmm12
movdqa XMMWORD[(160+112)+rbp],xmm13
movdqa XMMWORD[(160+128)+rbp],xmm14
movdqa XMMWORD[(160+144)+rbp],xmm15
mov rcx,4
mov r8,rsi
$L$open_sse_main_loop_rounds:
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,XMMWORD[$L$rol16]
paddd xmm3,xmm7
paddd xmm2,xmm6
paddd xmm1,xmm5
paddd xmm0,xmm4
pxor xmm15,xmm3
pxor xmm14,xmm2
pxor xmm13,xmm1
pxor xmm12,xmm0
DB 102,69,15,56,0,248
DB 102,69,15,56,0,240
DB 102,69,15,56,0,232
DB 102,69,15,56,0,224
movdqa xmm8,XMMWORD[((160+80))+rbp]
paddd xmm11,xmm15
paddd xmm10,xmm14
paddd xmm9,xmm13
paddd xmm8,xmm12
pxor xmm7,xmm11
add r10,QWORD[((0+0))+r8]
adc r11,QWORD[((8+0))+r8]
adc r12,1
lea r8,[16+r8]
pxor xmm6,xmm10
pxor xmm5,xmm9
pxor xmm4,xmm8
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,xmm7
psrld xmm8,20
pslld xmm7,32-20
pxor xmm7,xmm8
movdqa xmm8,xmm6
psrld xmm8,20
pslld xmm6,32-20
pxor xmm6,xmm8
movdqa xmm8,xmm5
psrld xmm8,20
pslld xmm5,32-20
pxor xmm5,xmm8
movdqa xmm8,xmm4
psrld xmm8,20
pslld xmm4,32-20
pxor xmm4,xmm8
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
movdqa xmm8,XMMWORD[$L$rol8]
paddd xmm3,xmm7
paddd xmm2,xmm6
paddd xmm1,xmm5
paddd xmm0,xmm4
pxor xmm15,xmm3
pxor xmm14,xmm2
pxor xmm13,xmm1
pxor xmm12,xmm0
DB 102,69,15,56,0,248
DB 102,69,15,56,0,240
DB 102,69,15,56,0,232
DB 102,69,15,56,0,224
movdqa xmm8,XMMWORD[((160+80))+rbp]
paddd xmm11,xmm15
paddd xmm10,xmm14
paddd xmm9,xmm13
paddd xmm8,xmm12
pxor xmm7,xmm11
pxor xmm6,xmm10
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
pxor xmm5,xmm9
pxor xmm4,xmm8
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,xmm7
psrld xmm8,25
pslld xmm7,32-25
pxor xmm7,xmm8
movdqa xmm8,xmm6
psrld xmm8,25
pslld xmm6,32-25
pxor xmm6,xmm8
movdqa xmm8,xmm5
psrld xmm8,25
pslld xmm5,32-25
pxor xmm5,xmm8
movdqa xmm8,xmm4
psrld xmm8,25
pslld xmm4,32-25
pxor xmm4,xmm8
movdqa xmm8,XMMWORD[((160+80))+rbp]
imul r9,r12
add r15,r10
adc r9,rdx
DB 102,15,58,15,255,4
DB 102,69,15,58,15,219,8
DB 102,69,15,58,15,255,12
DB 102,15,58,15,246,4
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,12
DB 102,15,58,15,237,4
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,12
DB 102,15,58,15,228,4
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,12
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,XMMWORD[$L$rol16]
paddd xmm3,xmm7
paddd xmm2,xmm6
paddd xmm1,xmm5
paddd xmm0,xmm4
pxor xmm15,xmm3
pxor xmm14,xmm2
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
pxor xmm13,xmm1
pxor xmm12,xmm0
DB 102,69,15,56,0,248
DB 102,69,15,56,0,240
DB 102,69,15,56,0,232
DB 102,69,15,56,0,224
movdqa xmm8,XMMWORD[((160+80))+rbp]
paddd xmm11,xmm15
paddd xmm10,xmm14
paddd xmm9,xmm13
paddd xmm8,xmm12
pxor xmm7,xmm11
pxor xmm6,xmm10
pxor xmm5,xmm9
pxor xmm4,xmm8
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,xmm7
psrld xmm8,20
pslld xmm7,32-20
pxor xmm7,xmm8
movdqa xmm8,xmm6
psrld xmm8,20
pslld xmm6,32-20
pxor xmm6,xmm8
movdqa xmm8,xmm5
psrld xmm8,20
pslld xmm5,32-20
pxor xmm5,xmm8
movdqa xmm8,xmm4
psrld xmm8,20
pslld xmm4,32-20
pxor xmm4,xmm8
movdqa xmm8,XMMWORD[$L$rol8]
paddd xmm3,xmm7
paddd xmm2,xmm6
paddd xmm1,xmm5
paddd xmm0,xmm4
pxor xmm15,xmm3
pxor xmm14,xmm2
pxor xmm13,xmm1
pxor xmm12,xmm0
DB 102,69,15,56,0,248
DB 102,69,15,56,0,240
DB 102,69,15,56,0,232
DB 102,69,15,56,0,224
movdqa xmm8,XMMWORD[((160+80))+rbp]
paddd xmm11,xmm15
paddd xmm10,xmm14
paddd xmm9,xmm13
paddd xmm8,xmm12
pxor xmm7,xmm11
pxor xmm6,xmm10
pxor xmm5,xmm9
pxor xmm4,xmm8
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,xmm7
psrld xmm8,25
pslld xmm7,32-25
pxor xmm7,xmm8
movdqa xmm8,xmm6
psrld xmm8,25
pslld xmm6,32-25
pxor xmm6,xmm8
movdqa xmm8,xmm5
psrld xmm8,25
pslld xmm5,32-25
pxor xmm5,xmm8
movdqa xmm8,xmm4
psrld xmm8,25
pslld xmm4,32-25
pxor xmm4,xmm8
movdqa xmm8,XMMWORD[((160+80))+rbp]
DB 102,15,58,15,255,12
DB 102,69,15,58,15,219,8
DB 102,69,15,58,15,255,4
DB 102,15,58,15,246,12
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,4
DB 102,15,58,15,237,12
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,4
DB 102,15,58,15,228,12
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,4
dec rcx
jge NEAR $L$open_sse_main_loop_rounds
add r10,QWORD[((0+0))+r8]
adc r11,QWORD[((8+0))+r8]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea r8,[16+r8]
cmp rcx,-6
jg NEAR $L$open_sse_main_loop_rounds
paddd xmm3,XMMWORD[$L$chacha20_consts]
paddd xmm7,XMMWORD[((160+48))+rbp]
paddd xmm11,XMMWORD[((160+64))+rbp]
paddd xmm15,XMMWORD[((160+144))+rbp]
paddd xmm2,XMMWORD[$L$chacha20_consts]
paddd xmm6,XMMWORD[((160+48))+rbp]
paddd xmm10,XMMWORD[((160+64))+rbp]
paddd xmm14,XMMWORD[((160+128))+rbp]
paddd xmm1,XMMWORD[$L$chacha20_consts]
paddd xmm5,XMMWORD[((160+48))+rbp]
paddd xmm9,XMMWORD[((160+64))+rbp]
paddd xmm13,XMMWORD[((160+112))+rbp]
paddd xmm0,XMMWORD[$L$chacha20_consts]
paddd xmm4,XMMWORD[((160+48))+rbp]
paddd xmm8,XMMWORD[((160+64))+rbp]
paddd xmm12,XMMWORD[((160+96))+rbp]
movdqa XMMWORD[(160+80)+rbp],xmm12
movdqu xmm12,XMMWORD[((0 + 0))+rsi]
pxor xmm12,xmm3
movdqu XMMWORD[(0 + 0)+rdi],xmm12
movdqu xmm12,XMMWORD[((16 + 0))+rsi]
pxor xmm12,xmm7
movdqu XMMWORD[(16 + 0)+rdi],xmm12
movdqu xmm12,XMMWORD[((32 + 0))+rsi]
pxor xmm12,xmm11
movdqu XMMWORD[(32 + 0)+rdi],xmm12
movdqu xmm12,XMMWORD[((48 + 0))+rsi]
pxor xmm12,xmm15
movdqu XMMWORD[(48 + 0)+rdi],xmm12
movdqu xmm3,XMMWORD[((0 + 64))+rsi]
movdqu xmm7,XMMWORD[((16 + 64))+rsi]
movdqu xmm11,XMMWORD[((32 + 64))+rsi]
movdqu xmm15,XMMWORD[((48 + 64))+rsi]
pxor xmm2,xmm3
pxor xmm6,xmm7
pxor xmm10,xmm11
pxor xmm15,xmm14
movdqu XMMWORD[(0 + 64)+rdi],xmm2
movdqu XMMWORD[(16 + 64)+rdi],xmm6
movdqu XMMWORD[(32 + 64)+rdi],xmm10
movdqu XMMWORD[(48 + 64)+rdi],xmm15
movdqu xmm3,XMMWORD[((0 + 128))+rsi]
movdqu xmm7,XMMWORD[((16 + 128))+rsi]
movdqu xmm11,XMMWORD[((32 + 128))+rsi]
movdqu xmm15,XMMWORD[((48 + 128))+rsi]
pxor xmm1,xmm3
pxor xmm5,xmm7
pxor xmm9,xmm11
pxor xmm15,xmm13
movdqu XMMWORD[(0 + 128)+rdi],xmm1
movdqu XMMWORD[(16 + 128)+rdi],xmm5
movdqu XMMWORD[(32 + 128)+rdi],xmm9
movdqu XMMWORD[(48 + 128)+rdi],xmm15
movdqu xmm3,XMMWORD[((0 + 192))+rsi]
movdqu xmm7,XMMWORD[((16 + 192))+rsi]
movdqu xmm11,XMMWORD[((32 + 192))+rsi]
movdqu xmm15,XMMWORD[((48 + 192))+rsi]
pxor xmm0,xmm3
pxor xmm4,xmm7
pxor xmm8,xmm11
pxor xmm15,XMMWORD[((160+80))+rbp]
movdqu XMMWORD[(0 + 192)+rdi],xmm0
movdqu XMMWORD[(16 + 192)+rdi],xmm4
movdqu XMMWORD[(32 + 192)+rdi],xmm8
movdqu XMMWORD[(48 + 192)+rdi],xmm15
lea rsi,[256+rsi]
lea rdi,[256+rdi]
sub rbx,16*16
jmp NEAR $L$open_sse_main_loop
$L$open_sse_tail:
test rbx,rbx
jz NEAR $L$open_sse_finalize
cmp rbx,12*16
ja NEAR $L$open_sse_tail_256
cmp rbx,8*16
ja NEAR $L$open_sse_tail_192
cmp rbx,4*16
ja NEAR $L$open_sse_tail_128
movdqa xmm0,XMMWORD[$L$chacha20_consts]
movdqa xmm4,XMMWORD[((160+48))+rbp]
movdqa xmm8,XMMWORD[((160+64))+rbp]
movdqa xmm12,XMMWORD[((160+96))+rbp]
paddd xmm12,XMMWORD[$L$sse_inc]
movdqa XMMWORD[(160+96)+rbp],xmm12
xor r8,r8
mov rcx,rbx
cmp rcx,16
jb NEAR $L$open_sse_tail_64_rounds
$L$open_sse_tail_64_rounds_and_x1hash:
add r10,QWORD[((0+0))+r8*1+rsi]
adc r11,QWORD[((8+0))+r8*1+rsi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
sub rcx,16
$L$open_sse_tail_64_rounds:
add r8,16
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,4
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,12
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,12
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,4
cmp rcx,16
jae NEAR $L$open_sse_tail_64_rounds_and_x1hash
cmp r8,10*16
jne NEAR $L$open_sse_tail_64_rounds
paddd xmm0,XMMWORD[$L$chacha20_consts]
paddd xmm4,XMMWORD[((160+48))+rbp]
paddd xmm8,XMMWORD[((160+64))+rbp]
paddd xmm12,XMMWORD[((160+96))+rbp]
jmp NEAR $L$open_sse_tail_64_dec_loop
$L$open_sse_tail_128:
movdqa xmm0,XMMWORD[$L$chacha20_consts]
movdqa xmm4,XMMWORD[((160+48))+rbp]
movdqa xmm8,XMMWORD[((160+64))+rbp]
movdqa xmm1,xmm0
movdqa xmm5,xmm4
movdqa xmm9,xmm8
movdqa xmm13,XMMWORD[((160+96))+rbp]
paddd xmm13,XMMWORD[$L$sse_inc]
movdqa xmm12,xmm13
paddd xmm12,XMMWORD[$L$sse_inc]
movdqa XMMWORD[(160+96)+rbp],xmm12
movdqa XMMWORD[(160+112)+rbp],xmm13
mov rcx,rbx
and rcx,-16
xor r8,r8
$L$open_sse_tail_128_rounds_and_x1hash:
add r10,QWORD[((0+0))+r8*1+rsi]
adc r11,QWORD[((8+0))+r8*1+rsi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
$L$open_sse_tail_128_rounds:
add r8,16
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,4
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,12
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol16]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,12
psrld xmm5,20
pxor xmm5,xmm3
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol8]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,7
psrld xmm5,25
pxor xmm5,xmm3
DB 102,15,58,15,237,4
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,12
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,12
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,4
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol16]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,12
psrld xmm5,20
pxor xmm5,xmm3
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol8]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,7
psrld xmm5,25
pxor xmm5,xmm3
DB 102,15,58,15,237,12
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,4
cmp r8,rcx
jb NEAR $L$open_sse_tail_128_rounds_and_x1hash
cmp r8,10*16
jne NEAR $L$open_sse_tail_128_rounds
paddd xmm1,XMMWORD[$L$chacha20_consts]
paddd xmm5,XMMWORD[((160+48))+rbp]
paddd xmm9,XMMWORD[((160+64))+rbp]
paddd xmm13,XMMWORD[((160+112))+rbp]
paddd xmm0,XMMWORD[$L$chacha20_consts]
paddd xmm4,XMMWORD[((160+48))+rbp]
paddd xmm8,XMMWORD[((160+64))+rbp]
paddd xmm12,XMMWORD[((160+96))+rbp]
movdqu xmm3,XMMWORD[((0 + 0))+rsi]
movdqu xmm7,XMMWORD[((16 + 0))+rsi]
movdqu xmm11,XMMWORD[((32 + 0))+rsi]
movdqu xmm15,XMMWORD[((48 + 0))+rsi]
pxor xmm1,xmm3
pxor xmm5,xmm7
pxor xmm9,xmm11
pxor xmm15,xmm13
movdqu XMMWORD[(0 + 0)+rdi],xmm1
movdqu XMMWORD[(16 + 0)+rdi],xmm5
movdqu XMMWORD[(32 + 0)+rdi],xmm9
movdqu XMMWORD[(48 + 0)+rdi],xmm15
sub rbx,4*16
lea rsi,[64+rsi]
lea rdi,[64+rdi]
jmp NEAR $L$open_sse_tail_64_dec_loop
$L$open_sse_tail_192:
movdqa xmm0,XMMWORD[$L$chacha20_consts]
movdqa xmm4,XMMWORD[((160+48))+rbp]
movdqa xmm8,XMMWORD[((160+64))+rbp]
movdqa xmm1,xmm0
movdqa xmm5,xmm4
movdqa xmm9,xmm8
movdqa xmm2,xmm0
movdqa xmm6,xmm4
movdqa xmm10,xmm8
movdqa xmm14,XMMWORD[((160+96))+rbp]
paddd xmm14,XMMWORD[$L$sse_inc]
movdqa xmm13,xmm14
paddd xmm13,XMMWORD[$L$sse_inc]
movdqa xmm12,xmm13
paddd xmm12,XMMWORD[$L$sse_inc]
movdqa XMMWORD[(160+96)+rbp],xmm12
movdqa XMMWORD[(160+112)+rbp],xmm13
movdqa XMMWORD[(160+128)+rbp],xmm14
mov rcx,rbx
mov r8,10*16
cmp rcx,10*16
cmovg rcx,r8
and rcx,-16
xor r8,r8
$L$open_sse_tail_192_rounds_and_x1hash:
add r10,QWORD[((0+0))+r8*1+rsi]
adc r11,QWORD[((8+0))+r8*1+rsi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
$L$open_sse_tail_192_rounds:
add r8,16
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,4
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,12
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol16]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,12
psrld xmm5,20
pxor xmm5,xmm3
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol8]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,7
psrld xmm5,25
pxor xmm5,xmm3
DB 102,15,58,15,237,4
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,12
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol16]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,12
psrld xmm6,20
pxor xmm6,xmm3
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol8]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,7
psrld xmm6,25
pxor xmm6,xmm3
DB 102,15,58,15,246,4
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,12
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,12
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,4
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol16]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,12
psrld xmm5,20
pxor xmm5,xmm3
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol8]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,7
psrld xmm5,25
pxor xmm5,xmm3
DB 102,15,58,15,237,12
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,4
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol16]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,12
psrld xmm6,20
pxor xmm6,xmm3
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol8]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,7
psrld xmm6,25
pxor xmm6,xmm3
DB 102,15,58,15,246,12
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,4
cmp r8,rcx
jb NEAR $L$open_sse_tail_192_rounds_and_x1hash
cmp r8,10*16
jne NEAR $L$open_sse_tail_192_rounds
cmp rbx,11*16
jb NEAR $L$open_sse_tail_192_finish
add r10,QWORD[((0+160))+rsi]
adc r11,QWORD[((8+160))+rsi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
cmp rbx,12*16
jb NEAR $L$open_sse_tail_192_finish
add r10,QWORD[((0+176))+rsi]
adc r11,QWORD[((8+176))+rsi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
$L$open_sse_tail_192_finish:
paddd xmm2,XMMWORD[$L$chacha20_consts]
paddd xmm6,XMMWORD[((160+48))+rbp]
paddd xmm10,XMMWORD[((160+64))+rbp]
paddd xmm14,XMMWORD[((160+128))+rbp]
paddd xmm1,XMMWORD[$L$chacha20_consts]
paddd xmm5,XMMWORD[((160+48))+rbp]
paddd xmm9,XMMWORD[((160+64))+rbp]
paddd xmm13,XMMWORD[((160+112))+rbp]
paddd xmm0,XMMWORD[$L$chacha20_consts]
paddd xmm4,XMMWORD[((160+48))+rbp]
paddd xmm8,XMMWORD[((160+64))+rbp]
paddd xmm12,XMMWORD[((160+96))+rbp]
movdqu xmm3,XMMWORD[((0 + 0))+rsi]
movdqu xmm7,XMMWORD[((16 + 0))+rsi]
movdqu xmm11,XMMWORD[((32 + 0))+rsi]
movdqu xmm15,XMMWORD[((48 + 0))+rsi]
pxor xmm2,xmm3
pxor xmm6,xmm7
pxor xmm10,xmm11
pxor xmm15,xmm14
movdqu XMMWORD[(0 + 0)+rdi],xmm2
movdqu XMMWORD[(16 + 0)+rdi],xmm6
movdqu XMMWORD[(32 + 0)+rdi],xmm10
movdqu XMMWORD[(48 + 0)+rdi],xmm15
movdqu xmm3,XMMWORD[((0 + 64))+rsi]
movdqu xmm7,XMMWORD[((16 + 64))+rsi]
movdqu xmm11,XMMWORD[((32 + 64))+rsi]
movdqu xmm15,XMMWORD[((48 + 64))+rsi]
pxor xmm1,xmm3
pxor xmm5,xmm7
pxor xmm9,xmm11
pxor xmm15,xmm13
movdqu XMMWORD[(0 + 64)+rdi],xmm1
movdqu XMMWORD[(16 + 64)+rdi],xmm5
movdqu XMMWORD[(32 + 64)+rdi],xmm9
movdqu XMMWORD[(48 + 64)+rdi],xmm15
sub rbx,8*16
lea rsi,[128+rsi]
lea rdi,[128+rdi]
jmp NEAR $L$open_sse_tail_64_dec_loop
$L$open_sse_tail_256:
movdqa xmm0,XMMWORD[$L$chacha20_consts]
movdqa xmm4,XMMWORD[((160+48))+rbp]
movdqa xmm8,XMMWORD[((160+64))+rbp]
movdqa xmm1,xmm0
movdqa xmm5,xmm4
movdqa xmm9,xmm8
movdqa xmm2,xmm0
movdqa xmm6,xmm4
movdqa xmm10,xmm8
movdqa xmm3,xmm0
movdqa xmm7,xmm4
movdqa xmm11,xmm8
movdqa xmm15,XMMWORD[((160+96))+rbp]
paddd xmm15,XMMWORD[$L$sse_inc]
movdqa xmm14,xmm15
paddd xmm14,XMMWORD[$L$sse_inc]
movdqa xmm13,xmm14
paddd xmm13,XMMWORD[$L$sse_inc]
movdqa xmm12,xmm13
paddd xmm12,XMMWORD[$L$sse_inc]
movdqa XMMWORD[(160+96)+rbp],xmm12
movdqa XMMWORD[(160+112)+rbp],xmm13
movdqa XMMWORD[(160+128)+rbp],xmm14
movdqa XMMWORD[(160+144)+rbp],xmm15
xor r8,r8
$L$open_sse_tail_256_rounds_and_x1hash:
add r10,QWORD[((0+0))+r8*1+rsi]
adc r11,QWORD[((8+0))+r8*1+rsi]
adc r12,1
movdqa XMMWORD[(160+80)+rbp],xmm11
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm11,xmm4
pslld xmm11,12
psrld xmm4,20
pxor xmm4,xmm11
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm11,xmm4
pslld xmm11,7
psrld xmm4,25
pxor xmm4,xmm11
DB 102,15,58,15,228,4
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,12
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol16]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm11,xmm5
pslld xmm11,12
psrld xmm5,20
pxor xmm5,xmm11
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol8]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm11,xmm5
pslld xmm11,7
psrld xmm5,25
pxor xmm5,xmm11
DB 102,15,58,15,237,4
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,12
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol16]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm11,xmm6
pslld xmm11,12
psrld xmm6,20
pxor xmm6,xmm11
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol8]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm11,xmm6
pslld xmm11,7
psrld xmm6,25
pxor xmm6,xmm11
DB 102,15,58,15,246,4
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,12
movdqa xmm11,XMMWORD[((160+80))+rbp]
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
movdqa XMMWORD[(160+80)+rbp],xmm9
paddd xmm3,xmm7
pxor xmm15,xmm3
pshufb xmm15,XMMWORD[$L$rol16]
paddd xmm11,xmm15
pxor xmm7,xmm11
movdqa xmm9,xmm7
pslld xmm9,12
psrld xmm7,20
pxor xmm7,xmm9
paddd xmm3,xmm7
pxor xmm15,xmm3
pshufb xmm15,XMMWORD[$L$rol8]
paddd xmm11,xmm15
pxor xmm7,xmm11
movdqa xmm9,xmm7
pslld xmm9,7
psrld xmm7,25
pxor xmm7,xmm9
DB 102,15,58,15,255,4
DB 102,69,15,58,15,219,8
DB 102,69,15,58,15,255,12
movdqa xmm9,XMMWORD[((160+80))+rbp]
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
movdqa XMMWORD[(160+80)+rbp],xmm11
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm11,xmm4
pslld xmm11,12
psrld xmm4,20
pxor xmm4,xmm11
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm11,xmm4
pslld xmm11,7
psrld xmm4,25
pxor xmm4,xmm11
DB 102,15,58,15,228,12
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,4
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol16]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm11,xmm5
pslld xmm11,12
psrld xmm5,20
pxor xmm5,xmm11
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol8]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm11,xmm5
pslld xmm11,7
psrld xmm5,25
pxor xmm5,xmm11
DB 102,15,58,15,237,12
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,4
imul r9,r12
add r15,r10
adc r9,rdx
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol16]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm11,xmm6
pslld xmm11,12
psrld xmm6,20
pxor xmm6,xmm11
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol8]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm11,xmm6
pslld xmm11,7
psrld xmm6,25
pxor xmm6,xmm11
DB 102,15,58,15,246,12
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,4
movdqa xmm11,XMMWORD[((160+80))+rbp]
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
movdqa XMMWORD[(160+80)+rbp],xmm9
paddd xmm3,xmm7
pxor xmm15,xmm3
pshufb xmm15,XMMWORD[$L$rol16]
paddd xmm11,xmm15
pxor xmm7,xmm11
movdqa xmm9,xmm7
pslld xmm9,12
psrld xmm7,20
pxor xmm7,xmm9
paddd xmm3,xmm7
pxor xmm15,xmm3
pshufb xmm15,XMMWORD[$L$rol8]
paddd xmm11,xmm15
pxor xmm7,xmm11
movdqa xmm9,xmm7
pslld xmm9,7
psrld xmm7,25
pxor xmm7,xmm9
DB 102,15,58,15,255,12
DB 102,69,15,58,15,219,8
DB 102,69,15,58,15,255,4
movdqa xmm9,XMMWORD[((160+80))+rbp]
add r8,16
cmp r8,10*16
jb NEAR $L$open_sse_tail_256_rounds_and_x1hash
mov rcx,rbx
and rcx,-16
$L$open_sse_tail_256_hash:
add r10,QWORD[((0+0))+r8*1+rsi]
adc r11,QWORD[((8+0))+r8*1+rsi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
add r8,16
cmp r8,rcx
jb NEAR $L$open_sse_tail_256_hash
paddd xmm3,XMMWORD[$L$chacha20_consts]
paddd xmm7,XMMWORD[((160+48))+rbp]
paddd xmm11,XMMWORD[((160+64))+rbp]
paddd xmm15,XMMWORD[((160+144))+rbp]
paddd xmm2,XMMWORD[$L$chacha20_consts]
paddd xmm6,XMMWORD[((160+48))+rbp]
paddd xmm10,XMMWORD[((160+64))+rbp]
paddd xmm14,XMMWORD[((160+128))+rbp]
paddd xmm1,XMMWORD[$L$chacha20_consts]
paddd xmm5,XMMWORD[((160+48))+rbp]
paddd xmm9,XMMWORD[((160+64))+rbp]
paddd xmm13,XMMWORD[((160+112))+rbp]
paddd xmm0,XMMWORD[$L$chacha20_consts]
paddd xmm4,XMMWORD[((160+48))+rbp]
paddd xmm8,XMMWORD[((160+64))+rbp]
paddd xmm12,XMMWORD[((160+96))+rbp]
movdqa XMMWORD[(160+80)+rbp],xmm12
movdqu xmm12,XMMWORD[((0 + 0))+rsi]
pxor xmm12,xmm3
movdqu XMMWORD[(0 + 0)+rdi],xmm12
movdqu xmm12,XMMWORD[((16 + 0))+rsi]
pxor xmm12,xmm7
movdqu XMMWORD[(16 + 0)+rdi],xmm12
movdqu xmm12,XMMWORD[((32 + 0))+rsi]
pxor xmm12,xmm11
movdqu XMMWORD[(32 + 0)+rdi],xmm12
movdqu xmm12,XMMWORD[((48 + 0))+rsi]
pxor xmm12,xmm15
movdqu XMMWORD[(48 + 0)+rdi],xmm12
movdqu xmm3,XMMWORD[((0 + 64))+rsi]
movdqu xmm7,XMMWORD[((16 + 64))+rsi]
movdqu xmm11,XMMWORD[((32 + 64))+rsi]
movdqu xmm15,XMMWORD[((48 + 64))+rsi]
pxor xmm2,xmm3
pxor xmm6,xmm7
pxor xmm10,xmm11
pxor xmm15,xmm14
movdqu XMMWORD[(0 + 64)+rdi],xmm2
movdqu XMMWORD[(16 + 64)+rdi],xmm6
movdqu XMMWORD[(32 + 64)+rdi],xmm10
movdqu XMMWORD[(48 + 64)+rdi],xmm15
movdqu xmm3,XMMWORD[((0 + 128))+rsi]
movdqu xmm7,XMMWORD[((16 + 128))+rsi]
movdqu xmm11,XMMWORD[((32 + 128))+rsi]
movdqu xmm15,XMMWORD[((48 + 128))+rsi]
pxor xmm1,xmm3
pxor xmm5,xmm7
pxor xmm9,xmm11
pxor xmm15,xmm13
movdqu XMMWORD[(0 + 128)+rdi],xmm1
movdqu XMMWORD[(16 + 128)+rdi],xmm5
movdqu XMMWORD[(32 + 128)+rdi],xmm9
movdqu XMMWORD[(48 + 128)+rdi],xmm15
movdqa xmm12,XMMWORD[((160+80))+rbp]
sub rbx,12*16
lea rsi,[192+rsi]
lea rdi,[192+rdi]
$L$open_sse_tail_64_dec_loop:
cmp rbx,16
jb NEAR $L$open_sse_tail_16_init
sub rbx,16
movdqu xmm3,XMMWORD[rsi]
pxor xmm0,xmm3
movdqu XMMWORD[rdi],xmm0
lea rsi,[16+rsi]
lea rdi,[16+rdi]
movdqa xmm0,xmm4
movdqa xmm4,xmm8
movdqa xmm8,xmm12
jmp NEAR $L$open_sse_tail_64_dec_loop
$L$open_sse_tail_16_init:
movdqa xmm1,xmm0
$L$open_sse_tail_16:
test rbx,rbx
jz NEAR $L$open_sse_finalize
pxor xmm3,xmm3
lea rsi,[((-1))+rbx*1+rsi]
mov r8,rbx
$L$open_sse_tail_16_compose:
pslldq xmm3,1
pinsrb xmm3,BYTE[rsi],0
sub rsi,1
sub r8,1
jnz NEAR $L$open_sse_tail_16_compose
DB 102,73,15,126,221
pextrq r14,xmm3,1
pxor xmm3,xmm1
$L$open_sse_tail_16_extract:
pextrb XMMWORD[rdi],xmm3,0
psrldq xmm3,1
add rdi,1
sub rbx,1
jne NEAR $L$open_sse_tail_16_extract
add r10,r13
adc r11,r14
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
$L$open_sse_finalize:
add r10,QWORD[((0+160+32))+rbp]
adc r11,QWORD[((8+160+32))+rbp]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
mov r13,r10
mov r14,r11
mov r15,r12
sub r10,-5
sbb r11,-1
sbb r12,3
cmovc r10,r13
cmovc r11,r14
cmovc r12,r15
add r10,QWORD[((0+160+16))+rbp]
adc r11,QWORD[((8+160+16))+rbp]
movaps xmm6,XMMWORD[((0+0))+rbp]
movaps xmm7,XMMWORD[((16+0))+rbp]
movaps xmm8,XMMWORD[((32+0))+rbp]
movaps xmm9,XMMWORD[((48+0))+rbp]
movaps xmm10,XMMWORD[((64+0))+rbp]
movaps xmm11,XMMWORD[((80+0))+rbp]
movaps xmm12,XMMWORD[((96+0))+rbp]
movaps xmm13,XMMWORD[((112+0))+rbp]
movaps xmm14,XMMWORD[((128+0))+rbp]
movaps xmm15,XMMWORD[((144+0))+rbp]
add rsp,288 + 160 + 32
pop r9
mov QWORD[r9],r10
mov QWORD[8+r9],r11
pop r15
pop r14
pop r13
pop r12
pop rbx
pop rbp
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
ret
$L$open_sse_128:
movdqu xmm0,XMMWORD[$L$chacha20_consts]
movdqa xmm1,xmm0
movdqa xmm2,xmm0
movdqu xmm4,XMMWORD[r9]
movdqa xmm5,xmm4
movdqa xmm6,xmm4
movdqu xmm8,XMMWORD[16+r9]
movdqa xmm9,xmm8
movdqa xmm10,xmm8
movdqu xmm12,XMMWORD[32+r9]
movdqa xmm13,xmm12
paddd xmm13,XMMWORD[$L$sse_inc]
movdqa xmm14,xmm13
paddd xmm14,XMMWORD[$L$sse_inc]
movdqa xmm7,xmm4
movdqa xmm11,xmm8
movdqa xmm15,xmm13
mov r10,10
$L$open_sse_128_rounds:
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,4
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,12
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol16]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,12
psrld xmm5,20
pxor xmm5,xmm3
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol8]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,7
psrld xmm5,25
pxor xmm5,xmm3
DB 102,15,58,15,237,4
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,12
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol16]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,12
psrld xmm6,20
pxor xmm6,xmm3
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol8]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,7
psrld xmm6,25
pxor xmm6,xmm3
DB 102,15,58,15,246,4
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,12
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,12
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,4
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol16]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,12
psrld xmm5,20
pxor xmm5,xmm3
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol8]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,7
psrld xmm5,25
pxor xmm5,xmm3
DB 102,15,58,15,237,12
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,4
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol16]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,12
psrld xmm6,20
pxor xmm6,xmm3
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol8]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,7
psrld xmm6,25
pxor xmm6,xmm3
DB 102,15,58,15,246,12
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,4
dec r10
jnz NEAR $L$open_sse_128_rounds
paddd xmm0,XMMWORD[$L$chacha20_consts]
paddd xmm1,XMMWORD[$L$chacha20_consts]
paddd xmm2,XMMWORD[$L$chacha20_consts]
paddd xmm4,xmm7
paddd xmm5,xmm7
paddd xmm6,xmm7
paddd xmm9,xmm11
paddd xmm10,xmm11
paddd xmm13,xmm15
paddd xmm15,XMMWORD[$L$sse_inc]
paddd xmm14,xmm15
pand xmm0,XMMWORD[$L$clamp]
movdqa XMMWORD[(160+0)+rbp],xmm0
movdqa XMMWORD[(160+16)+rbp],xmm4
mov r8,r8
call poly_hash_ad_internal
$L$open_sse_128_xor_hash:
cmp rbx,16
jb NEAR $L$open_sse_tail_16
sub rbx,16
add r10,QWORD[((0+0))+rsi]
adc r11,QWORD[((8+0))+rsi]
adc r12,1
movdqu xmm3,XMMWORD[rsi]
pxor xmm1,xmm3
movdqu XMMWORD[rdi],xmm1
lea rsi,[16+rsi]
lea rdi,[16+rdi]
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
movdqa xmm1,xmm5
movdqa xmm5,xmm9
movdqa xmm9,xmm13
movdqa xmm13,xmm2
movdqa xmm2,xmm6
movdqa xmm6,xmm10
movdqa xmm10,xmm14
jmp NEAR $L$open_sse_128_xor_hash
$L$SEH_end_chacha20_poly1305_open:
global chacha20_poly1305_seal
ALIGN 64
chacha20_poly1305_seal:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_chacha20_poly1305_seal:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
mov r9,QWORD[48+rsp]
_CET_ENDBR
push rbp
push rbx
push r12
push r13
push r14
push r15
push r9
sub rsp,288 + 160 + 32
lea rbp,[32+rsp]
and rbp,-32
movaps XMMWORD[(0+0)+rbp],xmm6
movaps XMMWORD[(16+0)+rbp],xmm7
movaps XMMWORD[(32+0)+rbp],xmm8
movaps XMMWORD[(48+0)+rbp],xmm9
movaps XMMWORD[(64+0)+rbp],xmm10
movaps XMMWORD[(80+0)+rbp],xmm11
movaps XMMWORD[(96+0)+rbp],xmm12
movaps XMMWORD[(112+0)+rbp],xmm13
movaps XMMWORD[(128+0)+rbp],xmm14
movaps XMMWORD[(144+0)+rbp],xmm15
mov rbx,QWORD[56+r9]
add rbx,rdx
mov QWORD[((0+160+32))+rbp],r8
mov QWORD[((8+160+32))+rbp],rbx
mov rbx,rdx
mov eax,DWORD[((OPENSSL_ia32cap_P+8))]
and eax,288
xor eax,288
jz NEAR chacha20_poly1305_seal_avx2
cmp rbx,128
jbe NEAR $L$seal_sse_128
movdqa xmm0,XMMWORD[$L$chacha20_consts]
movdqu xmm4,XMMWORD[r9]
movdqu xmm8,XMMWORD[16+r9]
movdqu xmm12,XMMWORD[32+r9]
movdqa xmm1,xmm0
movdqa xmm2,xmm0
movdqa xmm3,xmm0
movdqa xmm5,xmm4
movdqa xmm6,xmm4
movdqa xmm7,xmm4
movdqa xmm9,xmm8
movdqa xmm10,xmm8
movdqa xmm11,xmm8
movdqa xmm15,xmm12
paddd xmm12,XMMWORD[$L$sse_inc]
movdqa xmm14,xmm12
paddd xmm12,XMMWORD[$L$sse_inc]
movdqa xmm13,xmm12
paddd xmm12,XMMWORD[$L$sse_inc]
movdqa XMMWORD[(160+48)+rbp],xmm4
movdqa XMMWORD[(160+64)+rbp],xmm8
movdqa XMMWORD[(160+96)+rbp],xmm12
movdqa XMMWORD[(160+112)+rbp],xmm13
movdqa XMMWORD[(160+128)+rbp],xmm14
movdqa XMMWORD[(160+144)+rbp],xmm15
mov r10,10
$L$seal_sse_init_rounds:
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,XMMWORD[$L$rol16]
paddd xmm3,xmm7
paddd xmm2,xmm6
paddd xmm1,xmm5
paddd xmm0,xmm4
pxor xmm15,xmm3
pxor xmm14,xmm2
pxor xmm13,xmm1
pxor xmm12,xmm0
DB 102,69,15,56,0,248
DB 102,69,15,56,0,240
DB 102,69,15,56,0,232
DB 102,69,15,56,0,224
movdqa xmm8,XMMWORD[((160+80))+rbp]
paddd xmm11,xmm15
paddd xmm10,xmm14
paddd xmm9,xmm13
paddd xmm8,xmm12
pxor xmm7,xmm11
pxor xmm6,xmm10
pxor xmm5,xmm9
pxor xmm4,xmm8
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,xmm7
psrld xmm8,20
pslld xmm7,32-20
pxor xmm7,xmm8
movdqa xmm8,xmm6
psrld xmm8,20
pslld xmm6,32-20
pxor xmm6,xmm8
movdqa xmm8,xmm5
psrld xmm8,20
pslld xmm5,32-20
pxor xmm5,xmm8
movdqa xmm8,xmm4
psrld xmm8,20
pslld xmm4,32-20
pxor xmm4,xmm8
movdqa xmm8,XMMWORD[$L$rol8]
paddd xmm3,xmm7
paddd xmm2,xmm6
paddd xmm1,xmm5
paddd xmm0,xmm4
pxor xmm15,xmm3
pxor xmm14,xmm2
pxor xmm13,xmm1
pxor xmm12,xmm0
DB 102,69,15,56,0,248
DB 102,69,15,56,0,240
DB 102,69,15,56,0,232
DB 102,69,15,56,0,224
movdqa xmm8,XMMWORD[((160+80))+rbp]
paddd xmm11,xmm15
paddd xmm10,xmm14
paddd xmm9,xmm13
paddd xmm8,xmm12
pxor xmm7,xmm11
pxor xmm6,xmm10
pxor xmm5,xmm9
pxor xmm4,xmm8
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,xmm7
psrld xmm8,25
pslld xmm7,32-25
pxor xmm7,xmm8
movdqa xmm8,xmm6
psrld xmm8,25
pslld xmm6,32-25
pxor xmm6,xmm8
movdqa xmm8,xmm5
psrld xmm8,25
pslld xmm5,32-25
pxor xmm5,xmm8
movdqa xmm8,xmm4
psrld xmm8,25
pslld xmm4,32-25
pxor xmm4,xmm8
movdqa xmm8,XMMWORD[((160+80))+rbp]
DB 102,15,58,15,255,4
DB 102,69,15,58,15,219,8
DB 102,69,15,58,15,255,12
DB 102,15,58,15,246,4
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,12
DB 102,15,58,15,237,4
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,12
DB 102,15,58,15,228,4
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,12
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,XMMWORD[$L$rol16]
paddd xmm3,xmm7
paddd xmm2,xmm6
paddd xmm1,xmm5
paddd xmm0,xmm4
pxor xmm15,xmm3
pxor xmm14,xmm2
pxor xmm13,xmm1
pxor xmm12,xmm0
DB 102,69,15,56,0,248
DB 102,69,15,56,0,240
DB 102,69,15,56,0,232
DB 102,69,15,56,0,224
movdqa xmm8,XMMWORD[((160+80))+rbp]
paddd xmm11,xmm15
paddd xmm10,xmm14
paddd xmm9,xmm13
paddd xmm8,xmm12
pxor xmm7,xmm11
pxor xmm6,xmm10
pxor xmm5,xmm9
pxor xmm4,xmm8
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,xmm7
psrld xmm8,20
pslld xmm7,32-20
pxor xmm7,xmm8
movdqa xmm8,xmm6
psrld xmm8,20
pslld xmm6,32-20
pxor xmm6,xmm8
movdqa xmm8,xmm5
psrld xmm8,20
pslld xmm5,32-20
pxor xmm5,xmm8
movdqa xmm8,xmm4
psrld xmm8,20
pslld xmm4,32-20
pxor xmm4,xmm8
movdqa xmm8,XMMWORD[$L$rol8]
paddd xmm3,xmm7
paddd xmm2,xmm6
paddd xmm1,xmm5
paddd xmm0,xmm4
pxor xmm15,xmm3
pxor xmm14,xmm2
pxor xmm13,xmm1
pxor xmm12,xmm0
DB 102,69,15,56,0,248
DB 102,69,15,56,0,240
DB 102,69,15,56,0,232
DB 102,69,15,56,0,224
movdqa xmm8,XMMWORD[((160+80))+rbp]
paddd xmm11,xmm15
paddd xmm10,xmm14
paddd xmm9,xmm13
paddd xmm8,xmm12
pxor xmm7,xmm11
pxor xmm6,xmm10
pxor xmm5,xmm9
pxor xmm4,xmm8
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,xmm7
psrld xmm8,25
pslld xmm7,32-25
pxor xmm7,xmm8
movdqa xmm8,xmm6
psrld xmm8,25
pslld xmm6,32-25
pxor xmm6,xmm8
movdqa xmm8,xmm5
psrld xmm8,25
pslld xmm5,32-25
pxor xmm5,xmm8
movdqa xmm8,xmm4
psrld xmm8,25
pslld xmm4,32-25
pxor xmm4,xmm8
movdqa xmm8,XMMWORD[((160+80))+rbp]
DB 102,15,58,15,255,12
DB 102,69,15,58,15,219,8
DB 102,69,15,58,15,255,4
DB 102,15,58,15,246,12
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,4
DB 102,15,58,15,237,12
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,4
DB 102,15,58,15,228,12
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,4
dec r10
jnz NEAR $L$seal_sse_init_rounds
paddd xmm3,XMMWORD[$L$chacha20_consts]
paddd xmm7,XMMWORD[((160+48))+rbp]
paddd xmm11,XMMWORD[((160+64))+rbp]
paddd xmm15,XMMWORD[((160+144))+rbp]
paddd xmm2,XMMWORD[$L$chacha20_consts]
paddd xmm6,XMMWORD[((160+48))+rbp]
paddd xmm10,XMMWORD[((160+64))+rbp]
paddd xmm14,XMMWORD[((160+128))+rbp]
paddd xmm1,XMMWORD[$L$chacha20_consts]
paddd xmm5,XMMWORD[((160+48))+rbp]
paddd xmm9,XMMWORD[((160+64))+rbp]
paddd xmm13,XMMWORD[((160+112))+rbp]
paddd xmm0,XMMWORD[$L$chacha20_consts]
paddd xmm4,XMMWORD[((160+48))+rbp]
paddd xmm8,XMMWORD[((160+64))+rbp]
paddd xmm12,XMMWORD[((160+96))+rbp]
pand xmm3,XMMWORD[$L$clamp]
movdqa XMMWORD[(160+0)+rbp],xmm3
movdqa XMMWORD[(160+16)+rbp],xmm7
mov r8,r8
call poly_hash_ad_internal
movdqu xmm3,XMMWORD[((0 + 0))+rsi]
movdqu xmm7,XMMWORD[((16 + 0))+rsi]
movdqu xmm11,XMMWORD[((32 + 0))+rsi]
movdqu xmm15,XMMWORD[((48 + 0))+rsi]
pxor xmm2,xmm3
pxor xmm6,xmm7
pxor xmm10,xmm11
pxor xmm15,xmm14
movdqu XMMWORD[(0 + 0)+rdi],xmm2
movdqu XMMWORD[(16 + 0)+rdi],xmm6
movdqu XMMWORD[(32 + 0)+rdi],xmm10
movdqu XMMWORD[(48 + 0)+rdi],xmm15
movdqu xmm3,XMMWORD[((0 + 64))+rsi]
movdqu xmm7,XMMWORD[((16 + 64))+rsi]
movdqu xmm11,XMMWORD[((32 + 64))+rsi]
movdqu xmm15,XMMWORD[((48 + 64))+rsi]
pxor xmm1,xmm3
pxor xmm5,xmm7
pxor xmm9,xmm11
pxor xmm15,xmm13
movdqu XMMWORD[(0 + 64)+rdi],xmm1
movdqu XMMWORD[(16 + 64)+rdi],xmm5
movdqu XMMWORD[(32 + 64)+rdi],xmm9
movdqu XMMWORD[(48 + 64)+rdi],xmm15
cmp rbx,12*16
ja NEAR $L$seal_sse_main_init
mov rcx,8*16
sub rbx,8*16
lea rsi,[128+rsi]
jmp NEAR $L$seal_sse_128_tail_hash
$L$seal_sse_main_init:
movdqu xmm3,XMMWORD[((0 + 128))+rsi]
movdqu xmm7,XMMWORD[((16 + 128))+rsi]
movdqu xmm11,XMMWORD[((32 + 128))+rsi]
movdqu xmm15,XMMWORD[((48 + 128))+rsi]
pxor xmm0,xmm3
pxor xmm4,xmm7
pxor xmm8,xmm11
pxor xmm15,xmm12
movdqu XMMWORD[(0 + 128)+rdi],xmm0
movdqu XMMWORD[(16 + 128)+rdi],xmm4
movdqu XMMWORD[(32 + 128)+rdi],xmm8
movdqu XMMWORD[(48 + 128)+rdi],xmm15
mov rcx,12*16
sub rbx,12*16
lea rsi,[192+rsi]
mov rcx,2
mov r8,8
cmp rbx,4*16
jbe NEAR $L$seal_sse_tail_64
cmp rbx,8*16
jbe NEAR $L$seal_sse_tail_128
cmp rbx,12*16
jbe NEAR $L$seal_sse_tail_192
$L$seal_sse_main_loop:
movdqa xmm0,XMMWORD[$L$chacha20_consts]
movdqa xmm4,XMMWORD[((160+48))+rbp]
movdqa xmm8,XMMWORD[((160+64))+rbp]
movdqa xmm1,xmm0
movdqa xmm5,xmm4
movdqa xmm9,xmm8
movdqa xmm2,xmm0
movdqa xmm6,xmm4
movdqa xmm10,xmm8
movdqa xmm3,xmm0
movdqa xmm7,xmm4
movdqa xmm11,xmm8
movdqa xmm15,XMMWORD[((160+96))+rbp]
paddd xmm15,XMMWORD[$L$sse_inc]
movdqa xmm14,xmm15
paddd xmm14,XMMWORD[$L$sse_inc]
movdqa xmm13,xmm14
paddd xmm13,XMMWORD[$L$sse_inc]
movdqa xmm12,xmm13
paddd xmm12,XMMWORD[$L$sse_inc]
movdqa XMMWORD[(160+96)+rbp],xmm12
movdqa XMMWORD[(160+112)+rbp],xmm13
movdqa XMMWORD[(160+128)+rbp],xmm14
movdqa XMMWORD[(160+144)+rbp],xmm15
ALIGN 32
$L$seal_sse_main_rounds:
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,XMMWORD[$L$rol16]
paddd xmm3,xmm7
paddd xmm2,xmm6
paddd xmm1,xmm5
paddd xmm0,xmm4
pxor xmm15,xmm3
pxor xmm14,xmm2
pxor xmm13,xmm1
pxor xmm12,xmm0
DB 102,69,15,56,0,248
DB 102,69,15,56,0,240
DB 102,69,15,56,0,232
DB 102,69,15,56,0,224
movdqa xmm8,XMMWORD[((160+80))+rbp]
paddd xmm11,xmm15
paddd xmm10,xmm14
paddd xmm9,xmm13
paddd xmm8,xmm12
pxor xmm7,xmm11
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
pxor xmm6,xmm10
pxor xmm5,xmm9
pxor xmm4,xmm8
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,xmm7
psrld xmm8,20
pslld xmm7,32-20
pxor xmm7,xmm8
movdqa xmm8,xmm6
psrld xmm8,20
pslld xmm6,32-20
pxor xmm6,xmm8
movdqa xmm8,xmm5
psrld xmm8,20
pslld xmm5,32-20
pxor xmm5,xmm8
movdqa xmm8,xmm4
psrld xmm8,20
pslld xmm4,32-20
pxor xmm4,xmm8
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
movdqa xmm8,XMMWORD[$L$rol8]
paddd xmm3,xmm7
paddd xmm2,xmm6
paddd xmm1,xmm5
paddd xmm0,xmm4
pxor xmm15,xmm3
pxor xmm14,xmm2
pxor xmm13,xmm1
pxor xmm12,xmm0
DB 102,69,15,56,0,248
DB 102,69,15,56,0,240
DB 102,69,15,56,0,232
DB 102,69,15,56,0,224
movdqa xmm8,XMMWORD[((160+80))+rbp]
paddd xmm11,xmm15
paddd xmm10,xmm14
paddd xmm9,xmm13
paddd xmm8,xmm12
pxor xmm7,xmm11
pxor xmm6,xmm10
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
pxor xmm5,xmm9
pxor xmm4,xmm8
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,xmm7
psrld xmm8,25
pslld xmm7,32-25
pxor xmm7,xmm8
movdqa xmm8,xmm6
psrld xmm8,25
pslld xmm6,32-25
pxor xmm6,xmm8
movdqa xmm8,xmm5
psrld xmm8,25
pslld xmm5,32-25
pxor xmm5,xmm8
movdqa xmm8,xmm4
psrld xmm8,25
pslld xmm4,32-25
pxor xmm4,xmm8
movdqa xmm8,XMMWORD[((160+80))+rbp]
imul r9,r12
add r15,r10
adc r9,rdx
DB 102,15,58,15,255,4
DB 102,69,15,58,15,219,8
DB 102,69,15,58,15,255,12
DB 102,15,58,15,246,4
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,12
DB 102,15,58,15,237,4
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,12
DB 102,15,58,15,228,4
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,12
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,XMMWORD[$L$rol16]
paddd xmm3,xmm7
paddd xmm2,xmm6
paddd xmm1,xmm5
paddd xmm0,xmm4
pxor xmm15,xmm3
pxor xmm14,xmm2
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
pxor xmm13,xmm1
pxor xmm12,xmm0
DB 102,69,15,56,0,248
DB 102,69,15,56,0,240
DB 102,69,15,56,0,232
DB 102,69,15,56,0,224
movdqa xmm8,XMMWORD[((160+80))+rbp]
paddd xmm11,xmm15
paddd xmm10,xmm14
paddd xmm9,xmm13
paddd xmm8,xmm12
pxor xmm7,xmm11
pxor xmm6,xmm10
pxor xmm5,xmm9
pxor xmm4,xmm8
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,xmm7
psrld xmm8,20
pslld xmm7,32-20
pxor xmm7,xmm8
movdqa xmm8,xmm6
psrld xmm8,20
pslld xmm6,32-20
pxor xmm6,xmm8
movdqa xmm8,xmm5
psrld xmm8,20
pslld xmm5,32-20
pxor xmm5,xmm8
movdqa xmm8,xmm4
psrld xmm8,20
pslld xmm4,32-20
pxor xmm4,xmm8
movdqa xmm8,XMMWORD[$L$rol8]
paddd xmm3,xmm7
paddd xmm2,xmm6
paddd xmm1,xmm5
paddd xmm0,xmm4
pxor xmm15,xmm3
pxor xmm14,xmm2
pxor xmm13,xmm1
pxor xmm12,xmm0
DB 102,69,15,56,0,248
DB 102,69,15,56,0,240
DB 102,69,15,56,0,232
DB 102,69,15,56,0,224
movdqa xmm8,XMMWORD[((160+80))+rbp]
paddd xmm11,xmm15
paddd xmm10,xmm14
paddd xmm9,xmm13
paddd xmm8,xmm12
pxor xmm7,xmm11
pxor xmm6,xmm10
pxor xmm5,xmm9
pxor xmm4,xmm8
movdqa XMMWORD[(160+80)+rbp],xmm8
movdqa xmm8,xmm7
psrld xmm8,25
pslld xmm7,32-25
pxor xmm7,xmm8
movdqa xmm8,xmm6
psrld xmm8,25
pslld xmm6,32-25
pxor xmm6,xmm8
movdqa xmm8,xmm5
psrld xmm8,25
pslld xmm5,32-25
pxor xmm5,xmm8
movdqa xmm8,xmm4
psrld xmm8,25
pslld xmm4,32-25
pxor xmm4,xmm8
movdqa xmm8,XMMWORD[((160+80))+rbp]
DB 102,15,58,15,255,12
DB 102,69,15,58,15,219,8
DB 102,69,15,58,15,255,4
DB 102,15,58,15,246,12
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,4
DB 102,15,58,15,237,12
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,4
DB 102,15,58,15,228,12
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,4
lea rdi,[16+rdi]
dec r8
jge NEAR $L$seal_sse_main_rounds
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[16+rdi]
dec rcx
jg NEAR $L$seal_sse_main_rounds
paddd xmm3,XMMWORD[$L$chacha20_consts]
paddd xmm7,XMMWORD[((160+48))+rbp]
paddd xmm11,XMMWORD[((160+64))+rbp]
paddd xmm15,XMMWORD[((160+144))+rbp]
paddd xmm2,XMMWORD[$L$chacha20_consts]
paddd xmm6,XMMWORD[((160+48))+rbp]
paddd xmm10,XMMWORD[((160+64))+rbp]
paddd xmm14,XMMWORD[((160+128))+rbp]
paddd xmm1,XMMWORD[$L$chacha20_consts]
paddd xmm5,XMMWORD[((160+48))+rbp]
paddd xmm9,XMMWORD[((160+64))+rbp]
paddd xmm13,XMMWORD[((160+112))+rbp]
paddd xmm0,XMMWORD[$L$chacha20_consts]
paddd xmm4,XMMWORD[((160+48))+rbp]
paddd xmm8,XMMWORD[((160+64))+rbp]
paddd xmm12,XMMWORD[((160+96))+rbp]
movdqa XMMWORD[(160+80)+rbp],xmm14
movdqa XMMWORD[(160+80)+rbp],xmm14
movdqu xmm14,XMMWORD[((0 + 0))+rsi]
pxor xmm14,xmm3
movdqu XMMWORD[(0 + 0)+rdi],xmm14
movdqu xmm14,XMMWORD[((16 + 0))+rsi]
pxor xmm14,xmm7
movdqu XMMWORD[(16 + 0)+rdi],xmm14
movdqu xmm14,XMMWORD[((32 + 0))+rsi]
pxor xmm14,xmm11
movdqu XMMWORD[(32 + 0)+rdi],xmm14
movdqu xmm14,XMMWORD[((48 + 0))+rsi]
pxor xmm14,xmm15
movdqu XMMWORD[(48 + 0)+rdi],xmm14
movdqa xmm14,XMMWORD[((160+80))+rbp]
movdqu xmm3,XMMWORD[((0 + 64))+rsi]
movdqu xmm7,XMMWORD[((16 + 64))+rsi]
movdqu xmm11,XMMWORD[((32 + 64))+rsi]
movdqu xmm15,XMMWORD[((48 + 64))+rsi]
pxor xmm2,xmm3
pxor xmm6,xmm7
pxor xmm10,xmm11
pxor xmm15,xmm14
movdqu XMMWORD[(0 + 64)+rdi],xmm2
movdqu XMMWORD[(16 + 64)+rdi],xmm6
movdqu XMMWORD[(32 + 64)+rdi],xmm10
movdqu XMMWORD[(48 + 64)+rdi],xmm15
movdqu xmm3,XMMWORD[((0 + 128))+rsi]
movdqu xmm7,XMMWORD[((16 + 128))+rsi]
movdqu xmm11,XMMWORD[((32 + 128))+rsi]
movdqu xmm15,XMMWORD[((48 + 128))+rsi]
pxor xmm1,xmm3
pxor xmm5,xmm7
pxor xmm9,xmm11
pxor xmm15,xmm13
movdqu XMMWORD[(0 + 128)+rdi],xmm1
movdqu XMMWORD[(16 + 128)+rdi],xmm5
movdqu XMMWORD[(32 + 128)+rdi],xmm9
movdqu XMMWORD[(48 + 128)+rdi],xmm15
cmp rbx,16*16
ja NEAR $L$seal_sse_main_loop_xor
mov rcx,12*16
sub rbx,12*16
lea rsi,[192+rsi]
jmp NEAR $L$seal_sse_128_tail_hash
$L$seal_sse_main_loop_xor:
movdqu xmm3,XMMWORD[((0 + 192))+rsi]
movdqu xmm7,XMMWORD[((16 + 192))+rsi]
movdqu xmm11,XMMWORD[((32 + 192))+rsi]
movdqu xmm15,XMMWORD[((48 + 192))+rsi]
pxor xmm0,xmm3
pxor xmm4,xmm7
pxor xmm8,xmm11
pxor xmm15,xmm12
movdqu XMMWORD[(0 + 192)+rdi],xmm0
movdqu XMMWORD[(16 + 192)+rdi],xmm4
movdqu XMMWORD[(32 + 192)+rdi],xmm8
movdqu XMMWORD[(48 + 192)+rdi],xmm15
lea rsi,[256+rsi]
sub rbx,16*16
mov rcx,6
mov r8,4
cmp rbx,12*16
jg NEAR $L$seal_sse_main_loop
mov rcx,rbx
test rbx,rbx
je NEAR $L$seal_sse_128_tail_hash
mov rcx,6
cmp rbx,8*16
ja NEAR $L$seal_sse_tail_192
cmp rbx,4*16
ja NEAR $L$seal_sse_tail_128
$L$seal_sse_tail_64:
movdqa xmm0,XMMWORD[$L$chacha20_consts]
movdqa xmm4,XMMWORD[((160+48))+rbp]
movdqa xmm8,XMMWORD[((160+64))+rbp]
movdqa xmm12,XMMWORD[((160+96))+rbp]
paddd xmm12,XMMWORD[$L$sse_inc]
movdqa XMMWORD[(160+96)+rbp],xmm12
$L$seal_sse_tail_64_rounds_and_x2hash:
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[16+rdi]
$L$seal_sse_tail_64_rounds_and_x1hash:
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,4
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,12
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,12
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,4
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[16+rdi]
dec rcx
jg NEAR $L$seal_sse_tail_64_rounds_and_x2hash
dec r8
jge NEAR $L$seal_sse_tail_64_rounds_and_x1hash
paddd xmm0,XMMWORD[$L$chacha20_consts]
paddd xmm4,XMMWORD[((160+48))+rbp]
paddd xmm8,XMMWORD[((160+64))+rbp]
paddd xmm12,XMMWORD[((160+96))+rbp]
jmp NEAR $L$seal_sse_128_tail_xor
$L$seal_sse_tail_128:
movdqa xmm0,XMMWORD[$L$chacha20_consts]
movdqa xmm4,XMMWORD[((160+48))+rbp]
movdqa xmm8,XMMWORD[((160+64))+rbp]
movdqa xmm1,xmm0
movdqa xmm5,xmm4
movdqa xmm9,xmm8
movdqa xmm13,XMMWORD[((160+96))+rbp]
paddd xmm13,XMMWORD[$L$sse_inc]
movdqa xmm12,xmm13
paddd xmm12,XMMWORD[$L$sse_inc]
movdqa XMMWORD[(160+96)+rbp],xmm12
movdqa XMMWORD[(160+112)+rbp],xmm13
$L$seal_sse_tail_128_rounds_and_x2hash:
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[16+rdi]
$L$seal_sse_tail_128_rounds_and_x1hash:
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,4
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,12
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol16]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,12
psrld xmm5,20
pxor xmm5,xmm3
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol8]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,7
psrld xmm5,25
pxor xmm5,xmm3
DB 102,15,58,15,237,4
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,12
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,12
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,4
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol16]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,12
psrld xmm5,20
pxor xmm5,xmm3
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol8]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,7
psrld xmm5,25
pxor xmm5,xmm3
DB 102,15,58,15,237,12
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,4
lea rdi,[16+rdi]
dec rcx
jg NEAR $L$seal_sse_tail_128_rounds_and_x2hash
dec r8
jge NEAR $L$seal_sse_tail_128_rounds_and_x1hash
paddd xmm1,XMMWORD[$L$chacha20_consts]
paddd xmm5,XMMWORD[((160+48))+rbp]
paddd xmm9,XMMWORD[((160+64))+rbp]
paddd xmm13,XMMWORD[((160+112))+rbp]
paddd xmm0,XMMWORD[$L$chacha20_consts]
paddd xmm4,XMMWORD[((160+48))+rbp]
paddd xmm8,XMMWORD[((160+64))+rbp]
paddd xmm12,XMMWORD[((160+96))+rbp]
movdqu xmm3,XMMWORD[((0 + 0))+rsi]
movdqu xmm7,XMMWORD[((16 + 0))+rsi]
movdqu xmm11,XMMWORD[((32 + 0))+rsi]
movdqu xmm15,XMMWORD[((48 + 0))+rsi]
pxor xmm1,xmm3
pxor xmm5,xmm7
pxor xmm9,xmm11
pxor xmm15,xmm13
movdqu XMMWORD[(0 + 0)+rdi],xmm1
movdqu XMMWORD[(16 + 0)+rdi],xmm5
movdqu XMMWORD[(32 + 0)+rdi],xmm9
movdqu XMMWORD[(48 + 0)+rdi],xmm15
mov rcx,4*16
sub rbx,4*16
lea rsi,[64+rsi]
jmp NEAR $L$seal_sse_128_tail_hash
$L$seal_sse_tail_192:
movdqa xmm0,XMMWORD[$L$chacha20_consts]
movdqa xmm4,XMMWORD[((160+48))+rbp]
movdqa xmm8,XMMWORD[((160+64))+rbp]
movdqa xmm1,xmm0
movdqa xmm5,xmm4
movdqa xmm9,xmm8
movdqa xmm2,xmm0
movdqa xmm6,xmm4
movdqa xmm10,xmm8
movdqa xmm14,XMMWORD[((160+96))+rbp]
paddd xmm14,XMMWORD[$L$sse_inc]
movdqa xmm13,xmm14
paddd xmm13,XMMWORD[$L$sse_inc]
movdqa xmm12,xmm13
paddd xmm12,XMMWORD[$L$sse_inc]
movdqa XMMWORD[(160+96)+rbp],xmm12
movdqa XMMWORD[(160+112)+rbp],xmm13
movdqa XMMWORD[(160+128)+rbp],xmm14
$L$seal_sse_tail_192_rounds_and_x2hash:
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[16+rdi]
$L$seal_sse_tail_192_rounds_and_x1hash:
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,4
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,12
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol16]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,12
psrld xmm5,20
pxor xmm5,xmm3
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol8]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,7
psrld xmm5,25
pxor xmm5,xmm3
DB 102,15,58,15,237,4
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,12
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol16]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,12
psrld xmm6,20
pxor xmm6,xmm3
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol8]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,7
psrld xmm6,25
pxor xmm6,xmm3
DB 102,15,58,15,246,4
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,12
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,12
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,4
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol16]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,12
psrld xmm5,20
pxor xmm5,xmm3
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol8]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,7
psrld xmm5,25
pxor xmm5,xmm3
DB 102,15,58,15,237,12
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,4
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol16]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,12
psrld xmm6,20
pxor xmm6,xmm3
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol8]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,7
psrld xmm6,25
pxor xmm6,xmm3
DB 102,15,58,15,246,12
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,4
lea rdi,[16+rdi]
dec rcx
jg NEAR $L$seal_sse_tail_192_rounds_and_x2hash
dec r8
jge NEAR $L$seal_sse_tail_192_rounds_and_x1hash
paddd xmm2,XMMWORD[$L$chacha20_consts]
paddd xmm6,XMMWORD[((160+48))+rbp]
paddd xmm10,XMMWORD[((160+64))+rbp]
paddd xmm14,XMMWORD[((160+128))+rbp]
paddd xmm1,XMMWORD[$L$chacha20_consts]
paddd xmm5,XMMWORD[((160+48))+rbp]
paddd xmm9,XMMWORD[((160+64))+rbp]
paddd xmm13,XMMWORD[((160+112))+rbp]
paddd xmm0,XMMWORD[$L$chacha20_consts]
paddd xmm4,XMMWORD[((160+48))+rbp]
paddd xmm8,XMMWORD[((160+64))+rbp]
paddd xmm12,XMMWORD[((160+96))+rbp]
movdqu xmm3,XMMWORD[((0 + 0))+rsi]
movdqu xmm7,XMMWORD[((16 + 0))+rsi]
movdqu xmm11,XMMWORD[((32 + 0))+rsi]
movdqu xmm15,XMMWORD[((48 + 0))+rsi]
pxor xmm2,xmm3
pxor xmm6,xmm7
pxor xmm10,xmm11
pxor xmm15,xmm14
movdqu XMMWORD[(0 + 0)+rdi],xmm2
movdqu XMMWORD[(16 + 0)+rdi],xmm6
movdqu XMMWORD[(32 + 0)+rdi],xmm10
movdqu XMMWORD[(48 + 0)+rdi],xmm15
movdqu xmm3,XMMWORD[((0 + 64))+rsi]
movdqu xmm7,XMMWORD[((16 + 64))+rsi]
movdqu xmm11,XMMWORD[((32 + 64))+rsi]
movdqu xmm15,XMMWORD[((48 + 64))+rsi]
pxor xmm1,xmm3
pxor xmm5,xmm7
pxor xmm9,xmm11
pxor xmm15,xmm13
movdqu XMMWORD[(0 + 64)+rdi],xmm1
movdqu XMMWORD[(16 + 64)+rdi],xmm5
movdqu XMMWORD[(32 + 64)+rdi],xmm9
movdqu XMMWORD[(48 + 64)+rdi],xmm15
mov rcx,8*16
sub rbx,8*16
lea rsi,[128+rsi]
$L$seal_sse_128_tail_hash:
cmp rcx,16
jb NEAR $L$seal_sse_128_tail_xor
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
sub rcx,16
lea rdi,[16+rdi]
jmp NEAR $L$seal_sse_128_tail_hash
$L$seal_sse_128_tail_xor:
cmp rbx,16
jb NEAR $L$seal_sse_tail_16
sub rbx,16
movdqu xmm3,XMMWORD[rsi]
pxor xmm0,xmm3
movdqu XMMWORD[rdi],xmm0
add r10,QWORD[rdi]
adc r11,QWORD[8+rdi]
adc r12,1
lea rsi,[16+rsi]
lea rdi,[16+rdi]
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
movdqa xmm0,xmm4
movdqa xmm4,xmm8
movdqa xmm8,xmm12
movdqa xmm12,xmm1
movdqa xmm1,xmm5
movdqa xmm5,xmm9
movdqa xmm9,xmm13
jmp NEAR $L$seal_sse_128_tail_xor
$L$seal_sse_tail_16:
test rbx,rbx
jz NEAR $L$process_blocks_of_extra_in
mov r8,rbx
mov rcx,rbx
lea rsi,[((-1))+rbx*1+rsi]
pxor xmm15,xmm15
$L$seal_sse_tail_16_compose:
pslldq xmm15,1
pinsrb xmm15,BYTE[rsi],0
lea rsi,[((-1))+rsi]
dec rcx
jne NEAR $L$seal_sse_tail_16_compose
pxor xmm15,xmm0
mov rcx,rbx
movdqu xmm0,xmm15
$L$seal_sse_tail_16_extract:
pextrb XMMWORD[rdi],xmm0,0
psrldq xmm0,1
add rdi,1
sub rcx,1
jnz NEAR $L$seal_sse_tail_16_extract
mov r9,QWORD[((288 + 160 + 32))+rsp]
mov r14,QWORD[56+r9]
mov r13,QWORD[48+r9]
test r14,r14
jz NEAR $L$process_partial_block
mov r15,16
sub r15,rbx
cmp r14,r15
jge NEAR $L$load_extra_in
mov r15,r14
$L$load_extra_in:
lea rsi,[((-1))+r15*1+r13]
add r13,r15
sub r14,r15
mov QWORD[48+r9],r13
mov QWORD[56+r9],r14
add r8,r15
pxor xmm11,xmm11
$L$load_extra_load_loop:
pslldq xmm11,1
pinsrb xmm11,BYTE[rsi],0
lea rsi,[((-1))+rsi]
sub r15,1
jnz NEAR $L$load_extra_load_loop
mov r15,rbx
$L$load_extra_shift_loop:
pslldq xmm11,1
sub r15,1
jnz NEAR $L$load_extra_shift_loop
lea r15,[$L$and_masks]
shl rbx,4
pand xmm15,XMMWORD[((-16))+rbx*1+r15]
por xmm15,xmm11
DB 102,77,15,126,253
pextrq r14,xmm15,1
add r10,r13
adc r11,r14
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
$L$process_blocks_of_extra_in:
mov r9,QWORD[((288+32+160 ))+rsp]
mov rsi,QWORD[48+r9]
mov r8,QWORD[56+r9]
mov rcx,r8
shr r8,4
$L$process_extra_hash_loop:
jz NEAR process_extra_in_trailer
add r10,QWORD[((0+0))+rsi]
adc r11,QWORD[((8+0))+rsi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rsi,[16+rsi]
sub r8,1
jmp NEAR $L$process_extra_hash_loop
process_extra_in_trailer:
and rcx,15
mov rbx,rcx
jz NEAR $L$do_length_block
lea rsi,[((-1))+rcx*1+rsi]
$L$process_extra_in_trailer_load:
pslldq xmm15,1
pinsrb xmm15,BYTE[rsi],0
lea rsi,[((-1))+rsi]
sub rcx,1
jnz NEAR $L$process_extra_in_trailer_load
$L$process_partial_block:
lea r15,[$L$and_masks]
shl rbx,4
pand xmm15,XMMWORD[((-16))+rbx*1+r15]
DB 102,77,15,126,253
pextrq r14,xmm15,1
add r10,r13
adc r11,r14
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
$L$do_length_block:
add r10,QWORD[((0+160+32))+rbp]
adc r11,QWORD[((8+160+32))+rbp]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
mov r13,r10
mov r14,r11
mov r15,r12
sub r10,-5
sbb r11,-1
sbb r12,3
cmovc r10,r13
cmovc r11,r14
cmovc r12,r15
add r10,QWORD[((0+160+16))+rbp]
adc r11,QWORD[((8+160+16))+rbp]
movaps xmm6,XMMWORD[((0+0))+rbp]
movaps xmm7,XMMWORD[((16+0))+rbp]
movaps xmm8,XMMWORD[((32+0))+rbp]
movaps xmm9,XMMWORD[((48+0))+rbp]
movaps xmm10,XMMWORD[((64+0))+rbp]
movaps xmm11,XMMWORD[((80+0))+rbp]
movaps xmm12,XMMWORD[((96+0))+rbp]
movaps xmm13,XMMWORD[((112+0))+rbp]
movaps xmm14,XMMWORD[((128+0))+rbp]
movaps xmm15,XMMWORD[((144+0))+rbp]
add rsp,288 + 160 + 32
pop r9
mov QWORD[r9],r10
mov QWORD[8+r9],r11
pop r15
pop r14
pop r13
pop r12
pop rbx
pop rbp
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
ret
$L$seal_sse_128:
movdqu xmm0,XMMWORD[$L$chacha20_consts]
movdqa xmm1,xmm0
movdqa xmm2,xmm0
movdqu xmm4,XMMWORD[r9]
movdqa xmm5,xmm4
movdqa xmm6,xmm4
movdqu xmm8,XMMWORD[16+r9]
movdqa xmm9,xmm8
movdqa xmm10,xmm8
movdqu xmm14,XMMWORD[32+r9]
movdqa xmm12,xmm14
paddd xmm12,XMMWORD[$L$sse_inc]
movdqa xmm13,xmm12
paddd xmm13,XMMWORD[$L$sse_inc]
movdqa xmm7,xmm4
movdqa xmm11,xmm8
movdqa xmm15,xmm12
mov r10,10
$L$seal_sse_128_rounds:
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,4
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,12
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol16]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,12
psrld xmm5,20
pxor xmm5,xmm3
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol8]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,7
psrld xmm5,25
pxor xmm5,xmm3
DB 102,15,58,15,237,4
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,12
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol16]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,12
psrld xmm6,20
pxor xmm6,xmm3
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol8]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,7
psrld xmm6,25
pxor xmm6,xmm3
DB 102,15,58,15,246,4
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,12
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol16]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,12
psrld xmm4,20
pxor xmm4,xmm3
paddd xmm0,xmm4
pxor xmm12,xmm0
pshufb xmm12,XMMWORD[$L$rol8]
paddd xmm8,xmm12
pxor xmm4,xmm8
movdqa xmm3,xmm4
pslld xmm3,7
psrld xmm4,25
pxor xmm4,xmm3
DB 102,15,58,15,228,12
DB 102,69,15,58,15,192,8
DB 102,69,15,58,15,228,4
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol16]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,12
psrld xmm5,20
pxor xmm5,xmm3
paddd xmm1,xmm5
pxor xmm13,xmm1
pshufb xmm13,XMMWORD[$L$rol8]
paddd xmm9,xmm13
pxor xmm5,xmm9
movdqa xmm3,xmm5
pslld xmm3,7
psrld xmm5,25
pxor xmm5,xmm3
DB 102,15,58,15,237,12
DB 102,69,15,58,15,201,8
DB 102,69,15,58,15,237,4
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol16]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,12
psrld xmm6,20
pxor xmm6,xmm3
paddd xmm2,xmm6
pxor xmm14,xmm2
pshufb xmm14,XMMWORD[$L$rol8]
paddd xmm10,xmm14
pxor xmm6,xmm10
movdqa xmm3,xmm6
pslld xmm3,7
psrld xmm6,25
pxor xmm6,xmm3
DB 102,15,58,15,246,12
DB 102,69,15,58,15,210,8
DB 102,69,15,58,15,246,4
dec r10
jnz NEAR $L$seal_sse_128_rounds
paddd xmm0,XMMWORD[$L$chacha20_consts]
paddd xmm1,XMMWORD[$L$chacha20_consts]
paddd xmm2,XMMWORD[$L$chacha20_consts]
paddd xmm4,xmm7
paddd xmm5,xmm7
paddd xmm6,xmm7
paddd xmm8,xmm11
paddd xmm9,xmm11
paddd xmm12,xmm15
paddd xmm15,XMMWORD[$L$sse_inc]
paddd xmm13,xmm15
pand xmm2,XMMWORD[$L$clamp]
movdqa XMMWORD[(160+0)+rbp],xmm2
movdqa XMMWORD[(160+16)+rbp],xmm6
mov r8,r8
call poly_hash_ad_internal
jmp NEAR $L$seal_sse_128_tail_xor
$L$SEH_end_chacha20_poly1305_seal:
ALIGN 64
chacha20_poly1305_open_avx2:
vzeroupper
vmovdqa ymm0,YMMWORD[$L$chacha20_consts]
vbroadcasti128 ymm4,XMMWORD[r9]
vbroadcasti128 ymm8,XMMWORD[16+r9]
vbroadcasti128 ymm12,XMMWORD[32+r9]
vpaddd ymm12,ymm12,YMMWORD[$L$avx2_init]
cmp rbx,6*32
jbe NEAR $L$open_avx2_192
cmp rbx,10*32
jbe NEAR $L$open_avx2_320
vmovdqa YMMWORD[(160+64)+rbp],ymm4
vmovdqa YMMWORD[(160+96)+rbp],ymm8
vmovdqa YMMWORD[(160+160)+rbp],ymm12
mov r10,10
$L$open_avx2_init_rounds:
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,12
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,4
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,12
dec r10
jne NEAR $L$open_avx2_init_rounds
vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts]
vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp]
vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp]
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vperm2i128 ymm3,ymm4,ymm0,0x02
vpand ymm3,ymm3,YMMWORD[$L$clamp]
vmovdqa YMMWORD[(160+0)+rbp],ymm3
vperm2i128 ymm0,ymm4,ymm0,0x13
vperm2i128 ymm4,ymm12,ymm8,0x13
mov r8,r8
call poly_hash_ad_internal
xor rcx,rcx
$L$open_avx2_init_hash:
add r10,QWORD[((0+0))+rcx*1+rsi]
adc r11,QWORD[((8+0))+rcx*1+rsi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
add rcx,16
cmp rcx,2*32
jne NEAR $L$open_avx2_init_hash
vpxor ymm0,ymm0,YMMWORD[rsi]
vpxor ymm4,ymm4,YMMWORD[32+rsi]
vmovdqu YMMWORD[rdi],ymm0
vmovdqu YMMWORD[32+rdi],ymm4
lea rsi,[64+rsi]
lea rdi,[64+rdi]
sub rbx,2*32
$L$open_avx2_main_loop:
cmp rbx,16*32
jb NEAR $L$open_avx2_main_loop_done
vmovdqa ymm0,YMMWORD[$L$chacha20_consts]
vmovdqa ymm4,YMMWORD[((160+64))+rbp]
vmovdqa ymm8,YMMWORD[((160+96))+rbp]
vmovdqa ymm1,ymm0
vmovdqa ymm5,ymm4
vmovdqa ymm9,ymm8
vmovdqa ymm2,ymm0
vmovdqa ymm6,ymm4
vmovdqa ymm10,ymm8
vmovdqa ymm3,ymm0
vmovdqa ymm7,ymm4
vmovdqa ymm11,ymm8
vmovdqa ymm12,YMMWORD[$L$avx2_inc]
vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp]
vpaddd ymm14,ymm12,ymm15
vpaddd ymm13,ymm12,ymm14
vpaddd ymm12,ymm12,ymm13
vmovdqa YMMWORD[(160+256)+rbp],ymm15
vmovdqa YMMWORD[(160+224)+rbp],ymm14
vmovdqa YMMWORD[(160+192)+rbp],ymm13
vmovdqa YMMWORD[(160+160)+rbp],ymm12
xor rcx,rcx
$L$open_avx2_main_loop_rounds:
add r10,QWORD[((0+0))+rcx*1+rsi]
adc r11,QWORD[((8+0))+rcx*1+rsi]
adc r12,1
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vmovdqa ymm8,YMMWORD[$L$rol16]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,20
vpslld ymm7,ymm7,32-20
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,20
vpslld ymm6,ymm6,32-20
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,20
vpslld ymm5,ymm5,32-20
add r15,rax
adc r9,rdx
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,20
vpslld ymm4,ymm4,32-20
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[$L$rol8]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
add r10,QWORD[((0+16))+rcx*1+rsi]
adc r11,QWORD[((8+16))+rcx*1+rsi]
adc r12,1
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,25
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
vpslld ymm7,ymm7,32-25
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,25
vpslld ymm6,ymm6,32-25
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,25
vpslld ymm5,ymm5,32-25
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,25
vpslld ymm4,ymm4,32-25
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[((160+128))+rbp]
vpalignr ymm7,ymm7,ymm7,4
vpalignr ymm11,ymm11,ymm11,8
vpalignr ymm15,ymm15,ymm15,12
vpalignr ymm6,ymm6,ymm6,4
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm14,ymm14,ymm14,12
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
vpalignr ymm5,ymm5,ymm5,4
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm13,ymm13,ymm13,12
vpalignr ymm4,ymm4,ymm4,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm12,ymm12,ymm12,12
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vmovdqa ymm8,YMMWORD[$L$rol16]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
add r15,rax
adc r9,rdx
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,20
vpslld ymm7,ymm7,32-20
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,20
vpslld ymm6,ymm6,32-20
vpxor ymm6,ymm6,ymm8
add r10,QWORD[((0+32))+rcx*1+rsi]
adc r11,QWORD[((8+32))+rcx*1+rsi]
adc r12,1
lea rcx,[48+rcx]
vpsrld ymm8,ymm5,20
vpslld ymm5,ymm5,32-20
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,20
vpslld ymm4,ymm4,32-20
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[$L$rol8]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,25
vpslld ymm7,ymm7,32-25
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,25
vpslld ymm6,ymm6,32-25
vpxor ymm6,ymm6,ymm8
add r15,rax
adc r9,rdx
vpsrld ymm8,ymm5,25
vpslld ymm5,ymm5,32-25
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,25
vpslld ymm4,ymm4,32-25
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[((160+128))+rbp]
vpalignr ymm7,ymm7,ymm7,12
vpalignr ymm11,ymm11,ymm11,8
vpalignr ymm15,ymm15,ymm15,4
vpalignr ymm6,ymm6,ymm6,12
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm14,ymm14,ymm14,4
vpalignr ymm5,ymm5,ymm5,12
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm13,ymm13,ymm13,4
vpalignr ymm4,ymm4,ymm4,12
vpalignr ymm8,ymm8,ymm8,8
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
vpalignr ymm12,ymm12,ymm12,4
cmp rcx,10*6*8
jne NEAR $L$open_avx2_main_loop_rounds
vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts]
vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp]
vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp]
vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp]
vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts]
vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp]
vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp]
vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp]
vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts]
vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp]
vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp]
vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp]
vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts]
vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp]
vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp]
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vmovdqa YMMWORD[(160+128)+rbp],ymm0
add r10,QWORD[((0+480))+rsi]
adc r11,QWORD[((8+480))+rsi]
adc r12,1
vperm2i128 ymm0,ymm7,ymm3,0x02
vperm2i128 ymm7,ymm7,ymm3,0x13
vperm2i128 ymm3,ymm15,ymm11,0x02
vperm2i128 ymm11,ymm15,ymm11,0x13
vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi]
vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi]
vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi]
vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi]
vmovdqu YMMWORD[(0+0)+rdi],ymm0
vmovdqu YMMWORD[(32+0)+rdi],ymm3
vmovdqu YMMWORD[(64+0)+rdi],ymm7
vmovdqu YMMWORD[(96+0)+rdi],ymm11
vmovdqa ymm0,YMMWORD[((160+128))+rbp]
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
vperm2i128 ymm3,ymm6,ymm2,0x02
vperm2i128 ymm6,ymm6,ymm2,0x13
vperm2i128 ymm2,ymm14,ymm10,0x02
vperm2i128 ymm10,ymm14,ymm10,0x13
vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi]
vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi]
vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi]
vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi]
vmovdqu YMMWORD[(0+128)+rdi],ymm3
vmovdqu YMMWORD[(32+128)+rdi],ymm2
vmovdqu YMMWORD[(64+128)+rdi],ymm6
vmovdqu YMMWORD[(96+128)+rdi],ymm10
add r10,QWORD[((0+480+16))+rsi]
adc r11,QWORD[((8+480+16))+rsi]
adc r12,1
vperm2i128 ymm3,ymm5,ymm1,0x02
vperm2i128 ymm5,ymm5,ymm1,0x13
vperm2i128 ymm1,ymm13,ymm9,0x02
vperm2i128 ymm9,ymm13,ymm9,0x13
vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi]
vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi]
vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi]
vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi]
vmovdqu YMMWORD[(0+256)+rdi],ymm3
vmovdqu YMMWORD[(32+256)+rdi],ymm1
vmovdqu YMMWORD[(64+256)+rdi],ymm5
vmovdqu YMMWORD[(96+256)+rdi],ymm9
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
vperm2i128 ymm3,ymm4,ymm0,0x02
vperm2i128 ymm4,ymm4,ymm0,0x13
vperm2i128 ymm0,ymm12,ymm8,0x02
vperm2i128 ymm8,ymm12,ymm8,0x13
vpxor ymm3,ymm3,YMMWORD[((0+384))+rsi]
vpxor ymm0,ymm0,YMMWORD[((32+384))+rsi]
vpxor ymm4,ymm4,YMMWORD[((64+384))+rsi]
vpxor ymm8,ymm8,YMMWORD[((96+384))+rsi]
vmovdqu YMMWORD[(0+384)+rdi],ymm3
vmovdqu YMMWORD[(32+384)+rdi],ymm0
vmovdqu YMMWORD[(64+384)+rdi],ymm4
vmovdqu YMMWORD[(96+384)+rdi],ymm8
lea rsi,[512+rsi]
lea rdi,[512+rdi]
sub rbx,16*32
jmp NEAR $L$open_avx2_main_loop
$L$open_avx2_main_loop_done:
test rbx,rbx
vzeroupper
je NEAR $L$open_sse_finalize
cmp rbx,12*32
ja NEAR $L$open_avx2_tail_512
cmp rbx,8*32
ja NEAR $L$open_avx2_tail_384
cmp rbx,4*32
ja NEAR $L$open_avx2_tail_256
vmovdqa ymm0,YMMWORD[$L$chacha20_consts]
vmovdqa ymm4,YMMWORD[((160+64))+rbp]
vmovdqa ymm8,YMMWORD[((160+96))+rbp]
vmovdqa ymm12,YMMWORD[$L$avx2_inc]
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vmovdqa YMMWORD[(160+160)+rbp],ymm12
xor r8,r8
mov rcx,rbx
and rcx,-16
test rcx,rcx
je NEAR $L$open_avx2_tail_128_rounds
$L$open_avx2_tail_128_rounds_and_x1hash:
add r10,QWORD[((0+0))+r8*1+rsi]
adc r11,QWORD[((8+0))+r8*1+rsi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
$L$open_avx2_tail_128_rounds:
add r8,16
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,12
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,4
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,12
cmp r8,rcx
jb NEAR $L$open_avx2_tail_128_rounds_and_x1hash
cmp r8,160
jne NEAR $L$open_avx2_tail_128_rounds
vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts]
vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp]
vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp]
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vperm2i128 ymm3,ymm4,ymm0,0x13
vperm2i128 ymm0,ymm4,ymm0,0x02
vperm2i128 ymm4,ymm12,ymm8,0x02
vperm2i128 ymm12,ymm12,ymm8,0x13
vmovdqa ymm8,ymm3
jmp NEAR $L$open_avx2_tail_128_xor
$L$open_avx2_tail_256:
vmovdqa ymm0,YMMWORD[$L$chacha20_consts]
vmovdqa ymm4,YMMWORD[((160+64))+rbp]
vmovdqa ymm8,YMMWORD[((160+96))+rbp]
vmovdqa ymm1,ymm0
vmovdqa ymm5,ymm4
vmovdqa ymm9,ymm8
vmovdqa ymm12,YMMWORD[$L$avx2_inc]
vpaddd ymm13,ymm12,YMMWORD[((160+160))+rbp]
vpaddd ymm12,ymm12,ymm13
vmovdqa YMMWORD[(160+160)+rbp],ymm12
vmovdqa YMMWORD[(160+192)+rbp],ymm13
mov QWORD[((160+128))+rbp],rbx
mov rcx,rbx
sub rcx,4*32
shr rcx,4
mov r8,10
cmp rcx,10
cmovg rcx,r8
mov rbx,rsi
xor r8,r8
$L$open_avx2_tail_256_rounds_and_x1hash:
add r10,QWORD[((0+0))+rbx]
adc r11,QWORD[((8+0))+rbx]
adc r12,1
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rbx,[16+rbx]
$L$open_avx2_tail_256_rounds:
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,12
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,4
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,12
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,4
inc r8
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,12
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,4
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,12
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol16]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpsrld ymm3,ymm6,20
vpslld ymm6,ymm6,12
vpxor ymm6,ymm6,ymm3
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol8]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpslld ymm3,ymm6,7
vpsrld ymm6,ymm6,25
vpxor ymm6,ymm6,ymm3
vpalignr ymm14,ymm14,ymm14,4
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm6,ymm6,ymm6,12
cmp r8,rcx
jb NEAR $L$open_avx2_tail_256_rounds_and_x1hash
cmp r8,10
jne NEAR $L$open_avx2_tail_256_rounds
mov r8,rbx
sub rbx,rsi
mov rcx,rbx
mov rbx,QWORD[((160+128))+rbp]
$L$open_avx2_tail_256_hash:
add rcx,16
cmp rcx,rbx
jg NEAR $L$open_avx2_tail_256_done
add r10,QWORD[((0+0))+r8]
adc r11,QWORD[((8+0))+r8]
adc r12,1
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea r8,[16+r8]
jmp NEAR $L$open_avx2_tail_256_hash
$L$open_avx2_tail_256_done:
vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts]
vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp]
vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp]
vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp]
vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts]
vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp]
vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp]
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vperm2i128 ymm3,ymm5,ymm1,0x02
vperm2i128 ymm5,ymm5,ymm1,0x13
vperm2i128 ymm1,ymm13,ymm9,0x02
vperm2i128 ymm9,ymm13,ymm9,0x13
vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi]
vpxor ymm1,ymm1,YMMWORD[((32+0))+rsi]
vpxor ymm5,ymm5,YMMWORD[((64+0))+rsi]
vpxor ymm9,ymm9,YMMWORD[((96+0))+rsi]
vmovdqu YMMWORD[(0+0)+rdi],ymm3
vmovdqu YMMWORD[(32+0)+rdi],ymm1
vmovdqu YMMWORD[(64+0)+rdi],ymm5
vmovdqu YMMWORD[(96+0)+rdi],ymm9
vperm2i128 ymm3,ymm4,ymm0,0x13
vperm2i128 ymm0,ymm4,ymm0,0x02
vperm2i128 ymm4,ymm12,ymm8,0x02
vperm2i128 ymm12,ymm12,ymm8,0x13
vmovdqa ymm8,ymm3
lea rsi,[128+rsi]
lea rdi,[128+rdi]
sub rbx,4*32
jmp NEAR $L$open_avx2_tail_128_xor
$L$open_avx2_tail_384:
vmovdqa ymm0,YMMWORD[$L$chacha20_consts]
vmovdqa ymm4,YMMWORD[((160+64))+rbp]
vmovdqa ymm8,YMMWORD[((160+96))+rbp]
vmovdqa ymm1,ymm0
vmovdqa ymm5,ymm4
vmovdqa ymm9,ymm8
vmovdqa ymm2,ymm0
vmovdqa ymm6,ymm4
vmovdqa ymm10,ymm8
vmovdqa ymm12,YMMWORD[$L$avx2_inc]
vpaddd ymm14,ymm12,YMMWORD[((160+160))+rbp]
vpaddd ymm13,ymm12,ymm14
vpaddd ymm12,ymm12,ymm13
vmovdqa YMMWORD[(160+160)+rbp],ymm12
vmovdqa YMMWORD[(160+192)+rbp],ymm13
vmovdqa YMMWORD[(160+224)+rbp],ymm14
mov QWORD[((160+128))+rbp],rbx
mov rcx,rbx
sub rcx,8*32
shr rcx,4
add rcx,6
mov r8,10
cmp rcx,10
cmovg rcx,r8
mov rbx,rsi
xor r8,r8
$L$open_avx2_tail_384_rounds_and_x2hash:
add r10,QWORD[((0+0))+rbx]
adc r11,QWORD[((8+0))+rbx]
adc r12,1
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rbx,[16+rbx]
$L$open_avx2_tail_384_rounds_and_x1hash:
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol16]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpsrld ymm3,ymm6,20
vpslld ymm6,ymm6,12
vpxor ymm6,ymm6,ymm3
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol8]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpslld ymm3,ymm6,7
vpsrld ymm6,ymm6,25
vpxor ymm6,ymm6,ymm3
vpalignr ymm14,ymm14,ymm14,12
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm6,ymm6,ymm6,4
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,12
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,4
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,12
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,4
add r10,QWORD[((0+0))+rbx]
adc r11,QWORD[((8+0))+rbx]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rbx,[16+rbx]
inc r8
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol16]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpsrld ymm3,ymm6,20
vpslld ymm6,ymm6,12
vpxor ymm6,ymm6,ymm3
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol8]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpslld ymm3,ymm6,7
vpsrld ymm6,ymm6,25
vpxor ymm6,ymm6,ymm3
vpalignr ymm14,ymm14,ymm14,4
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm6,ymm6,ymm6,12
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,4
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,12
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,12
cmp r8,rcx
jb NEAR $L$open_avx2_tail_384_rounds_and_x2hash
cmp r8,10
jne NEAR $L$open_avx2_tail_384_rounds_and_x1hash
mov r8,rbx
sub rbx,rsi
mov rcx,rbx
mov rbx,QWORD[((160+128))+rbp]
$L$open_avx2_384_tail_hash:
add rcx,16
cmp rcx,rbx
jg NEAR $L$open_avx2_384_tail_done
add r10,QWORD[((0+0))+r8]
adc r11,QWORD[((8+0))+r8]
adc r12,1
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea r8,[16+r8]
jmp NEAR $L$open_avx2_384_tail_hash
$L$open_avx2_384_tail_done:
vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts]
vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp]
vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp]
vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp]
vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts]
vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp]
vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp]
vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp]
vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts]
vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp]
vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp]
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vperm2i128 ymm3,ymm6,ymm2,0x02
vperm2i128 ymm6,ymm6,ymm2,0x13
vperm2i128 ymm2,ymm14,ymm10,0x02
vperm2i128 ymm10,ymm14,ymm10,0x13
vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi]
vpxor ymm2,ymm2,YMMWORD[((32+0))+rsi]
vpxor ymm6,ymm6,YMMWORD[((64+0))+rsi]
vpxor ymm10,ymm10,YMMWORD[((96+0))+rsi]
vmovdqu YMMWORD[(0+0)+rdi],ymm3
vmovdqu YMMWORD[(32+0)+rdi],ymm2
vmovdqu YMMWORD[(64+0)+rdi],ymm6
vmovdqu YMMWORD[(96+0)+rdi],ymm10
vperm2i128 ymm3,ymm5,ymm1,0x02
vperm2i128 ymm5,ymm5,ymm1,0x13
vperm2i128 ymm1,ymm13,ymm9,0x02
vperm2i128 ymm9,ymm13,ymm9,0x13
vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi]
vpxor ymm1,ymm1,YMMWORD[((32+128))+rsi]
vpxor ymm5,ymm5,YMMWORD[((64+128))+rsi]
vpxor ymm9,ymm9,YMMWORD[((96+128))+rsi]
vmovdqu YMMWORD[(0+128)+rdi],ymm3
vmovdqu YMMWORD[(32+128)+rdi],ymm1
vmovdqu YMMWORD[(64+128)+rdi],ymm5
vmovdqu YMMWORD[(96+128)+rdi],ymm9
vperm2i128 ymm3,ymm4,ymm0,0x13
vperm2i128 ymm0,ymm4,ymm0,0x02
vperm2i128 ymm4,ymm12,ymm8,0x02
vperm2i128 ymm12,ymm12,ymm8,0x13
vmovdqa ymm8,ymm3
lea rsi,[256+rsi]
lea rdi,[256+rdi]
sub rbx,8*32
jmp NEAR $L$open_avx2_tail_128_xor
$L$open_avx2_tail_512:
vmovdqa ymm0,YMMWORD[$L$chacha20_consts]
vmovdqa ymm4,YMMWORD[((160+64))+rbp]
vmovdqa ymm8,YMMWORD[((160+96))+rbp]
vmovdqa ymm1,ymm0
vmovdqa ymm5,ymm4
vmovdqa ymm9,ymm8
vmovdqa ymm2,ymm0
vmovdqa ymm6,ymm4
vmovdqa ymm10,ymm8
vmovdqa ymm3,ymm0
vmovdqa ymm7,ymm4
vmovdqa ymm11,ymm8
vmovdqa ymm12,YMMWORD[$L$avx2_inc]
vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp]
vpaddd ymm14,ymm12,ymm15
vpaddd ymm13,ymm12,ymm14
vpaddd ymm12,ymm12,ymm13
vmovdqa YMMWORD[(160+256)+rbp],ymm15
vmovdqa YMMWORD[(160+224)+rbp],ymm14
vmovdqa YMMWORD[(160+192)+rbp],ymm13
vmovdqa YMMWORD[(160+160)+rbp],ymm12
xor rcx,rcx
mov r8,rsi
$L$open_avx2_tail_512_rounds_and_x2hash:
add r10,QWORD[((0+0))+r8]
adc r11,QWORD[((8+0))+r8]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea r8,[16+r8]
$L$open_avx2_tail_512_rounds_and_x1hash:
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vmovdqa ymm8,YMMWORD[$L$rol16]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,20
vpslld ymm7,ymm7,32-20
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,20
vpslld ymm6,ymm6,32-20
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,20
vpslld ymm5,ymm5,32-20
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,20
vpslld ymm4,ymm4,32-20
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[$L$rol8]
vpaddd ymm3,ymm3,ymm7
add r10,QWORD[((0+0))+r8]
adc r11,QWORD[((8+0))+r8]
adc r12,1
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,25
vpslld ymm7,ymm7,32-25
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,25
vpslld ymm6,ymm6,32-25
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,25
vpslld ymm5,ymm5,32-25
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,25
vpslld ymm4,ymm4,32-25
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[((160+128))+rbp]
vpalignr ymm7,ymm7,ymm7,4
vpalignr ymm11,ymm11,ymm11,8
vpalignr ymm15,ymm15,ymm15,12
vpalignr ymm6,ymm6,ymm6,4
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm14,ymm14,ymm14,12
vpalignr ymm5,ymm5,ymm5,4
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm13,ymm13,ymm13,12
vpalignr ymm4,ymm4,ymm4,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm12,ymm12,ymm12,12
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vmovdqa ymm8,YMMWORD[$L$rol16]
vpaddd ymm3,ymm3,ymm7
add r10,QWORD[((0+16))+r8]
adc r11,QWORD[((8+16))+r8]
adc r12,1
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea r8,[32+r8]
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,20
vpslld ymm7,ymm7,32-20
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,20
vpslld ymm6,ymm6,32-20
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,20
vpslld ymm5,ymm5,32-20
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,20
vpslld ymm4,ymm4,32-20
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[$L$rol8]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,25
vpslld ymm7,ymm7,32-25
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,25
vpslld ymm6,ymm6,32-25
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,25
vpslld ymm5,ymm5,32-25
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,25
vpslld ymm4,ymm4,32-25
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[((160+128))+rbp]
vpalignr ymm7,ymm7,ymm7,12
vpalignr ymm11,ymm11,ymm11,8
vpalignr ymm15,ymm15,ymm15,4
vpalignr ymm6,ymm6,ymm6,12
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm14,ymm14,ymm14,4
vpalignr ymm5,ymm5,ymm5,12
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm13,ymm13,ymm13,4
vpalignr ymm4,ymm4,ymm4,12
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm12,ymm12,ymm12,4
inc rcx
cmp rcx,4
jl NEAR $L$open_avx2_tail_512_rounds_and_x2hash
cmp rcx,10
jne NEAR $L$open_avx2_tail_512_rounds_and_x1hash
mov rcx,rbx
sub rcx,12*32
and rcx,-16
$L$open_avx2_tail_512_hash:
test rcx,rcx
je NEAR $L$open_avx2_tail_512_done
add r10,QWORD[((0+0))+r8]
adc r11,QWORD[((8+0))+r8]
adc r12,1
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea r8,[16+r8]
sub rcx,2*8
jmp NEAR $L$open_avx2_tail_512_hash
$L$open_avx2_tail_512_done:
vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts]
vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp]
vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp]
vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp]
vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts]
vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp]
vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp]
vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp]
vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts]
vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp]
vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp]
vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp]
vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts]
vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp]
vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp]
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vmovdqa YMMWORD[(160+128)+rbp],ymm0
vperm2i128 ymm0,ymm7,ymm3,0x02
vperm2i128 ymm7,ymm7,ymm3,0x13
vperm2i128 ymm3,ymm15,ymm11,0x02
vperm2i128 ymm11,ymm15,ymm11,0x13
vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi]
vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi]
vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi]
vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi]
vmovdqu YMMWORD[(0+0)+rdi],ymm0
vmovdqu YMMWORD[(32+0)+rdi],ymm3
vmovdqu YMMWORD[(64+0)+rdi],ymm7
vmovdqu YMMWORD[(96+0)+rdi],ymm11
vmovdqa ymm0,YMMWORD[((160+128))+rbp]
vperm2i128 ymm3,ymm6,ymm2,0x02
vperm2i128 ymm6,ymm6,ymm2,0x13
vperm2i128 ymm2,ymm14,ymm10,0x02
vperm2i128 ymm10,ymm14,ymm10,0x13
vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi]
vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi]
vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi]
vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi]
vmovdqu YMMWORD[(0+128)+rdi],ymm3
vmovdqu YMMWORD[(32+128)+rdi],ymm2
vmovdqu YMMWORD[(64+128)+rdi],ymm6
vmovdqu YMMWORD[(96+128)+rdi],ymm10
vperm2i128 ymm3,ymm5,ymm1,0x02
vperm2i128 ymm5,ymm5,ymm1,0x13
vperm2i128 ymm1,ymm13,ymm9,0x02
vperm2i128 ymm9,ymm13,ymm9,0x13
vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi]
vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi]
vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi]
vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi]
vmovdqu YMMWORD[(0+256)+rdi],ymm3
vmovdqu YMMWORD[(32+256)+rdi],ymm1
vmovdqu YMMWORD[(64+256)+rdi],ymm5
vmovdqu YMMWORD[(96+256)+rdi],ymm9
vperm2i128 ymm3,ymm4,ymm0,0x13
vperm2i128 ymm0,ymm4,ymm0,0x02
vperm2i128 ymm4,ymm12,ymm8,0x02
vperm2i128 ymm12,ymm12,ymm8,0x13
vmovdqa ymm8,ymm3
lea rsi,[384+rsi]
lea rdi,[384+rdi]
sub rbx,12*32
$L$open_avx2_tail_128_xor:
cmp rbx,32
jb NEAR $L$open_avx2_tail_32_xor
sub rbx,32
vpxor ymm0,ymm0,YMMWORD[rsi]
vmovdqu YMMWORD[rdi],ymm0
lea rsi,[32+rsi]
lea rdi,[32+rdi]
vmovdqa ymm0,ymm4
vmovdqa ymm4,ymm8
vmovdqa ymm8,ymm12
jmp NEAR $L$open_avx2_tail_128_xor
$L$open_avx2_tail_32_xor:
cmp rbx,16
vmovdqa xmm1,xmm0
jb NEAR $L$open_avx2_exit
sub rbx,16
vpxor xmm1,xmm0,XMMWORD[rsi]
vmovdqu XMMWORD[rdi],xmm1
lea rsi,[16+rsi]
lea rdi,[16+rdi]
vperm2i128 ymm0,ymm0,ymm0,0x11
vmovdqa xmm1,xmm0
$L$open_avx2_exit:
vzeroupper
jmp NEAR $L$open_sse_tail_16
$L$open_avx2_192:
vmovdqa ymm1,ymm0
vmovdqa ymm2,ymm0
vmovdqa ymm5,ymm4
vmovdqa ymm6,ymm4
vmovdqa ymm9,ymm8
vmovdqa ymm10,ymm8
vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc]
vmovdqa ymm11,ymm12
vmovdqa ymm15,ymm13
mov r10,10
$L$open_avx2_192_rounds:
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,12
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,4
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,12
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,4
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,12
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,4
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,12
dec r10
jne NEAR $L$open_avx2_192_rounds
vpaddd ymm0,ymm0,ymm2
vpaddd ymm1,ymm1,ymm2
vpaddd ymm4,ymm4,ymm6
vpaddd ymm5,ymm5,ymm6
vpaddd ymm8,ymm8,ymm10
vpaddd ymm9,ymm9,ymm10
vpaddd ymm12,ymm12,ymm11
vpaddd ymm13,ymm13,ymm15
vperm2i128 ymm3,ymm4,ymm0,0x02
vpand ymm3,ymm3,YMMWORD[$L$clamp]
vmovdqa YMMWORD[(160+0)+rbp],ymm3
vperm2i128 ymm0,ymm4,ymm0,0x13
vperm2i128 ymm4,ymm12,ymm8,0x13
vperm2i128 ymm8,ymm5,ymm1,0x02
vperm2i128 ymm12,ymm13,ymm9,0x02
vperm2i128 ymm1,ymm5,ymm1,0x13
vperm2i128 ymm5,ymm13,ymm9,0x13
$L$open_avx2_short:
mov r8,r8
call poly_hash_ad_internal
$L$open_avx2_short_hash_and_xor_loop:
cmp rbx,32
jb NEAR $L$open_avx2_short_tail_32
sub rbx,32
add r10,QWORD[((0+0))+rsi]
adc r11,QWORD[((8+0))+rsi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
add r10,QWORD[((0+16))+rsi]
adc r11,QWORD[((8+16))+rsi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
vpxor ymm0,ymm0,YMMWORD[rsi]
vmovdqu YMMWORD[rdi],ymm0
lea rsi,[32+rsi]
lea rdi,[32+rdi]
vmovdqa ymm0,ymm4
vmovdqa ymm4,ymm8
vmovdqa ymm8,ymm12
vmovdqa ymm12,ymm1
vmovdqa ymm1,ymm5
vmovdqa ymm5,ymm9
vmovdqa ymm9,ymm13
vmovdqa ymm13,ymm2
vmovdqa ymm2,ymm6
jmp NEAR $L$open_avx2_short_hash_and_xor_loop
$L$open_avx2_short_tail_32:
cmp rbx,16
vmovdqa xmm1,xmm0
jb NEAR $L$open_avx2_short_tail_32_exit
sub rbx,16
add r10,QWORD[((0+0))+rsi]
adc r11,QWORD[((8+0))+rsi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
vpxor xmm3,xmm0,XMMWORD[rsi]
vmovdqu XMMWORD[rdi],xmm3
lea rsi,[16+rsi]
lea rdi,[16+rdi]
vextracti128 xmm1,ymm0,1
$L$open_avx2_short_tail_32_exit:
vzeroupper
jmp NEAR $L$open_sse_tail_16
$L$open_avx2_320:
vmovdqa ymm1,ymm0
vmovdqa ymm2,ymm0
vmovdqa ymm5,ymm4
vmovdqa ymm6,ymm4
vmovdqa ymm9,ymm8
vmovdqa ymm10,ymm8
vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc]
vpaddd ymm14,ymm13,YMMWORD[$L$avx2_inc]
vmovdqa ymm7,ymm4
vmovdqa ymm11,ymm8
vmovdqa YMMWORD[(160+160)+rbp],ymm12
vmovdqa YMMWORD[(160+192)+rbp],ymm13
vmovdqa YMMWORD[(160+224)+rbp],ymm14
mov r10,10
$L$open_avx2_320_rounds:
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,12
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,4
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,12
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,4
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol16]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpsrld ymm3,ymm6,20
vpslld ymm6,ymm6,12
vpxor ymm6,ymm6,ymm3
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol8]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpslld ymm3,ymm6,7
vpsrld ymm6,ymm6,25
vpxor ymm6,ymm6,ymm3
vpalignr ymm14,ymm14,ymm14,12
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm6,ymm6,ymm6,4
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,12
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,4
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,12
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol16]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpsrld ymm3,ymm6,20
vpslld ymm6,ymm6,12
vpxor ymm6,ymm6,ymm3
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol8]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpslld ymm3,ymm6,7
vpsrld ymm6,ymm6,25
vpxor ymm6,ymm6,ymm3
vpalignr ymm14,ymm14,ymm14,4
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm6,ymm6,ymm6,12
dec r10
jne NEAR $L$open_avx2_320_rounds
vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts]
vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts]
vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts]
vpaddd ymm4,ymm4,ymm7
vpaddd ymm5,ymm5,ymm7
vpaddd ymm6,ymm6,ymm7
vpaddd ymm8,ymm8,ymm11
vpaddd ymm9,ymm9,ymm11
vpaddd ymm10,ymm10,ymm11
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp]
vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp]
vperm2i128 ymm3,ymm4,ymm0,0x02
vpand ymm3,ymm3,YMMWORD[$L$clamp]
vmovdqa YMMWORD[(160+0)+rbp],ymm3
vperm2i128 ymm0,ymm4,ymm0,0x13
vperm2i128 ymm4,ymm12,ymm8,0x13
vperm2i128 ymm8,ymm5,ymm1,0x02
vperm2i128 ymm12,ymm13,ymm9,0x02
vperm2i128 ymm1,ymm5,ymm1,0x13
vperm2i128 ymm5,ymm13,ymm9,0x13
vperm2i128 ymm9,ymm6,ymm2,0x02
vperm2i128 ymm13,ymm14,ymm10,0x02
vperm2i128 ymm2,ymm6,ymm2,0x13
vperm2i128 ymm6,ymm14,ymm10,0x13
jmp NEAR $L$open_avx2_short
ALIGN 64
chacha20_poly1305_seal_avx2:
vzeroupper
vmovdqa ymm0,YMMWORD[$L$chacha20_consts]
vbroadcasti128 ymm4,XMMWORD[r9]
vbroadcasti128 ymm8,XMMWORD[16+r9]
vbroadcasti128 ymm12,XMMWORD[32+r9]
vpaddd ymm12,ymm12,YMMWORD[$L$avx2_init]
cmp rbx,6*32
jbe NEAR $L$seal_avx2_192
cmp rbx,10*32
jbe NEAR $L$seal_avx2_320
vmovdqa ymm1,ymm0
vmovdqa ymm2,ymm0
vmovdqa ymm3,ymm0
vmovdqa ymm5,ymm4
vmovdqa ymm6,ymm4
vmovdqa ymm7,ymm4
vmovdqa YMMWORD[(160+64)+rbp],ymm4
vmovdqa ymm9,ymm8
vmovdqa ymm10,ymm8
vmovdqa ymm11,ymm8
vmovdqa YMMWORD[(160+96)+rbp],ymm8
vmovdqa ymm15,ymm12
vpaddd ymm14,ymm15,YMMWORD[$L$avx2_inc]
vpaddd ymm13,ymm14,YMMWORD[$L$avx2_inc]
vpaddd ymm12,ymm13,YMMWORD[$L$avx2_inc]
vmovdqa YMMWORD[(160+160)+rbp],ymm12
vmovdqa YMMWORD[(160+192)+rbp],ymm13
vmovdqa YMMWORD[(160+224)+rbp],ymm14
vmovdqa YMMWORD[(160+256)+rbp],ymm15
mov r10,10
$L$seal_avx2_init_rounds:
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vmovdqa ymm8,YMMWORD[$L$rol16]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,20
vpslld ymm7,ymm7,32-20
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,20
vpslld ymm6,ymm6,32-20
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,20
vpslld ymm5,ymm5,32-20
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,20
vpslld ymm4,ymm4,32-20
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[$L$rol8]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,25
vpslld ymm7,ymm7,32-25
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,25
vpslld ymm6,ymm6,32-25
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,25
vpslld ymm5,ymm5,32-25
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,25
vpslld ymm4,ymm4,32-25
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[((160+128))+rbp]
vpalignr ymm7,ymm7,ymm7,4
vpalignr ymm11,ymm11,ymm11,8
vpalignr ymm15,ymm15,ymm15,12
vpalignr ymm6,ymm6,ymm6,4
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm14,ymm14,ymm14,12
vpalignr ymm5,ymm5,ymm5,4
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm13,ymm13,ymm13,12
vpalignr ymm4,ymm4,ymm4,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm12,ymm12,ymm12,12
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vmovdqa ymm8,YMMWORD[$L$rol16]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,20
vpslld ymm7,ymm7,32-20
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,20
vpslld ymm6,ymm6,32-20
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,20
vpslld ymm5,ymm5,32-20
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,20
vpslld ymm4,ymm4,32-20
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[$L$rol8]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,25
vpslld ymm7,ymm7,32-25
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,25
vpslld ymm6,ymm6,32-25
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,25
vpslld ymm5,ymm5,32-25
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,25
vpslld ymm4,ymm4,32-25
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[((160+128))+rbp]
vpalignr ymm7,ymm7,ymm7,12
vpalignr ymm11,ymm11,ymm11,8
vpalignr ymm15,ymm15,ymm15,4
vpalignr ymm6,ymm6,ymm6,12
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm14,ymm14,ymm14,4
vpalignr ymm5,ymm5,ymm5,12
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm13,ymm13,ymm13,4
vpalignr ymm4,ymm4,ymm4,12
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm12,ymm12,ymm12,4
dec r10
jnz NEAR $L$seal_avx2_init_rounds
vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts]
vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp]
vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp]
vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp]
vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts]
vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp]
vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp]
vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp]
vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts]
vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp]
vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp]
vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp]
vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts]
vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp]
vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp]
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vperm2i128 ymm11,ymm15,ymm11,0x13
vperm2i128 ymm15,ymm7,ymm3,0x02
vperm2i128 ymm3,ymm7,ymm3,0x13
vpand ymm15,ymm15,YMMWORD[$L$clamp]
vmovdqa YMMWORD[(160+0)+rbp],ymm15
mov r8,r8
call poly_hash_ad_internal
vpxor ymm3,ymm3,YMMWORD[rsi]
vpxor ymm11,ymm11,YMMWORD[32+rsi]
vmovdqu YMMWORD[rdi],ymm3
vmovdqu YMMWORD[32+rdi],ymm11
vperm2i128 ymm15,ymm6,ymm2,0x02
vperm2i128 ymm6,ymm6,ymm2,0x13
vperm2i128 ymm2,ymm14,ymm10,0x02
vperm2i128 ymm10,ymm14,ymm10,0x13
vpxor ymm15,ymm15,YMMWORD[((0+64))+rsi]
vpxor ymm2,ymm2,YMMWORD[((32+64))+rsi]
vpxor ymm6,ymm6,YMMWORD[((64+64))+rsi]
vpxor ymm10,ymm10,YMMWORD[((96+64))+rsi]
vmovdqu YMMWORD[(0+64)+rdi],ymm15
vmovdqu YMMWORD[(32+64)+rdi],ymm2
vmovdqu YMMWORD[(64+64)+rdi],ymm6
vmovdqu YMMWORD[(96+64)+rdi],ymm10
vperm2i128 ymm15,ymm5,ymm1,0x02
vperm2i128 ymm5,ymm5,ymm1,0x13
vperm2i128 ymm1,ymm13,ymm9,0x02
vperm2i128 ymm9,ymm13,ymm9,0x13
vpxor ymm15,ymm15,YMMWORD[((0+192))+rsi]
vpxor ymm1,ymm1,YMMWORD[((32+192))+rsi]
vpxor ymm5,ymm5,YMMWORD[((64+192))+rsi]
vpxor ymm9,ymm9,YMMWORD[((96+192))+rsi]
vmovdqu YMMWORD[(0+192)+rdi],ymm15
vmovdqu YMMWORD[(32+192)+rdi],ymm1
vmovdqu YMMWORD[(64+192)+rdi],ymm5
vmovdqu YMMWORD[(96+192)+rdi],ymm9
vperm2i128 ymm15,ymm4,ymm0,0x13
vperm2i128 ymm0,ymm4,ymm0,0x02
vperm2i128 ymm4,ymm12,ymm8,0x02
vperm2i128 ymm12,ymm12,ymm8,0x13
vmovdqa ymm8,ymm15
lea rsi,[320+rsi]
sub rbx,10*32
mov rcx,10*32
cmp rbx,4*32
jbe NEAR $L$seal_avx2_short_hash_remainder
vpxor ymm0,ymm0,YMMWORD[rsi]
vpxor ymm4,ymm4,YMMWORD[32+rsi]
vpxor ymm8,ymm8,YMMWORD[64+rsi]
vpxor ymm12,ymm12,YMMWORD[96+rsi]
vmovdqu YMMWORD[320+rdi],ymm0
vmovdqu YMMWORD[352+rdi],ymm4
vmovdqu YMMWORD[384+rdi],ymm8
vmovdqu YMMWORD[416+rdi],ymm12
lea rsi,[128+rsi]
sub rbx,4*32
mov rcx,8
mov r8,2
cmp rbx,4*32
jbe NEAR $L$seal_avx2_tail_128
cmp rbx,8*32
jbe NEAR $L$seal_avx2_tail_256
cmp rbx,12*32
jbe NEAR $L$seal_avx2_tail_384
cmp rbx,16*32
jbe NEAR $L$seal_avx2_tail_512
vmovdqa ymm0,YMMWORD[$L$chacha20_consts]
vmovdqa ymm4,YMMWORD[((160+64))+rbp]
vmovdqa ymm8,YMMWORD[((160+96))+rbp]
vmovdqa ymm1,ymm0
vmovdqa ymm5,ymm4
vmovdqa ymm9,ymm8
vmovdqa ymm2,ymm0
vmovdqa ymm6,ymm4
vmovdqa ymm10,ymm8
vmovdqa ymm3,ymm0
vmovdqa ymm7,ymm4
vmovdqa ymm11,ymm8
vmovdqa ymm12,YMMWORD[$L$avx2_inc]
vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp]
vpaddd ymm14,ymm12,ymm15
vpaddd ymm13,ymm12,ymm14
vpaddd ymm12,ymm12,ymm13
vmovdqa YMMWORD[(160+256)+rbp],ymm15
vmovdqa YMMWORD[(160+224)+rbp],ymm14
vmovdqa YMMWORD[(160+192)+rbp],ymm13
vmovdqa YMMWORD[(160+160)+rbp],ymm12
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vmovdqa ymm8,YMMWORD[$L$rol16]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,20
vpslld ymm7,ymm7,32-20
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,20
vpslld ymm6,ymm6,32-20
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,20
vpslld ymm5,ymm5,32-20
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,20
vpslld ymm4,ymm4,32-20
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[$L$rol8]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,25
vpslld ymm7,ymm7,32-25
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,25
vpslld ymm6,ymm6,32-25
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,25
vpslld ymm5,ymm5,32-25
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,25
vpslld ymm4,ymm4,32-25
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[((160+128))+rbp]
vpalignr ymm7,ymm7,ymm7,4
vpalignr ymm11,ymm11,ymm11,8
vpalignr ymm15,ymm15,ymm15,12
vpalignr ymm6,ymm6,ymm6,4
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm14,ymm14,ymm14,12
vpalignr ymm5,ymm5,ymm5,4
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm13,ymm13,ymm13,12
vpalignr ymm4,ymm4,ymm4,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm12,ymm12,ymm12,12
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vmovdqa ymm8,YMMWORD[$L$rol16]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,20
vpslld ymm7,ymm7,32-20
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,20
vpslld ymm6,ymm6,32-20
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,20
vpslld ymm5,ymm5,32-20
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,20
vpslld ymm4,ymm4,32-20
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[$L$rol8]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,25
vpslld ymm7,ymm7,32-25
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,25
vpslld ymm6,ymm6,32-25
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,25
vpslld ymm5,ymm5,32-25
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,25
vpslld ymm4,ymm4,32-25
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[((160+128))+rbp]
vpalignr ymm7,ymm7,ymm7,12
vpalignr ymm11,ymm11,ymm11,8
vpalignr ymm15,ymm15,ymm15,4
vpalignr ymm6,ymm6,ymm6,12
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm14,ymm14,ymm14,4
vpalignr ymm5,ymm5,ymm5,12
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm13,ymm13,ymm13,4
vpalignr ymm4,ymm4,ymm4,12
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm12,ymm12,ymm12,4
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vmovdqa ymm8,YMMWORD[$L$rol16]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,20
vpslld ymm7,ymm7,32-20
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,20
vpslld ymm6,ymm6,32-20
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,20
vpslld ymm5,ymm5,32-20
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,20
vpslld ymm4,ymm4,32-20
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[$L$rol8]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
sub rdi,16
mov rcx,9
jmp NEAR $L$seal_avx2_main_loop_rounds_entry
ALIGN 32
$L$seal_avx2_main_loop:
vmovdqa ymm0,YMMWORD[$L$chacha20_consts]
vmovdqa ymm4,YMMWORD[((160+64))+rbp]
vmovdqa ymm8,YMMWORD[((160+96))+rbp]
vmovdqa ymm1,ymm0
vmovdqa ymm5,ymm4
vmovdqa ymm9,ymm8
vmovdqa ymm2,ymm0
vmovdqa ymm6,ymm4
vmovdqa ymm10,ymm8
vmovdqa ymm3,ymm0
vmovdqa ymm7,ymm4
vmovdqa ymm11,ymm8
vmovdqa ymm12,YMMWORD[$L$avx2_inc]
vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp]
vpaddd ymm14,ymm12,ymm15
vpaddd ymm13,ymm12,ymm14
vpaddd ymm12,ymm12,ymm13
vmovdqa YMMWORD[(160+256)+rbp],ymm15
vmovdqa YMMWORD[(160+224)+rbp],ymm14
vmovdqa YMMWORD[(160+192)+rbp],ymm13
vmovdqa YMMWORD[(160+160)+rbp],ymm12
mov rcx,10
ALIGN 32
$L$seal_avx2_main_loop_rounds:
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vmovdqa ymm8,YMMWORD[$L$rol16]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,20
vpslld ymm7,ymm7,32-20
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,20
vpslld ymm6,ymm6,32-20
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,20
vpslld ymm5,ymm5,32-20
add r15,rax
adc r9,rdx
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,20
vpslld ymm4,ymm4,32-20
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[$L$rol8]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
$L$seal_avx2_main_loop_rounds_entry:
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
add r10,QWORD[((0+16))+rdi]
adc r11,QWORD[((8+16))+rdi]
adc r12,1
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,25
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
vpslld ymm7,ymm7,32-25
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,25
vpslld ymm6,ymm6,32-25
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,25
vpslld ymm5,ymm5,32-25
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,25
vpslld ymm4,ymm4,32-25
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[((160+128))+rbp]
vpalignr ymm7,ymm7,ymm7,4
vpalignr ymm11,ymm11,ymm11,8
vpalignr ymm15,ymm15,ymm15,12
vpalignr ymm6,ymm6,ymm6,4
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm14,ymm14,ymm14,12
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
vpalignr ymm5,ymm5,ymm5,4
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm13,ymm13,ymm13,12
vpalignr ymm4,ymm4,ymm4,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm12,ymm12,ymm12,12
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vmovdqa ymm8,YMMWORD[$L$rol16]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
add r15,rax
adc r9,rdx
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,20
vpslld ymm7,ymm7,32-20
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,20
vpslld ymm6,ymm6,32-20
vpxor ymm6,ymm6,ymm8
add r10,QWORD[((0+32))+rdi]
adc r11,QWORD[((8+32))+rdi]
adc r12,1
lea rdi,[48+rdi]
vpsrld ymm8,ymm5,20
vpslld ymm5,ymm5,32-20
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,20
vpslld ymm4,ymm4,32-20
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[$L$rol8]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,25
vpslld ymm7,ymm7,32-25
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,25
vpslld ymm6,ymm6,32-25
vpxor ymm6,ymm6,ymm8
add r15,rax
adc r9,rdx
vpsrld ymm8,ymm5,25
vpslld ymm5,ymm5,32-25
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,25
vpslld ymm4,ymm4,32-25
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[((160+128))+rbp]
vpalignr ymm7,ymm7,ymm7,12
vpalignr ymm11,ymm11,ymm11,8
vpalignr ymm15,ymm15,ymm15,4
vpalignr ymm6,ymm6,ymm6,12
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm14,ymm14,ymm14,4
vpalignr ymm5,ymm5,ymm5,12
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm13,ymm13,ymm13,4
vpalignr ymm4,ymm4,ymm4,12
vpalignr ymm8,ymm8,ymm8,8
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
vpalignr ymm12,ymm12,ymm12,4
dec rcx
jne NEAR $L$seal_avx2_main_loop_rounds
vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts]
vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp]
vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp]
vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp]
vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts]
vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp]
vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp]
vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp]
vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts]
vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp]
vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp]
vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp]
vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts]
vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp]
vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp]
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vmovdqa YMMWORD[(160+128)+rbp],ymm0
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
add r10,QWORD[((0+16))+rdi]
adc r11,QWORD[((8+16))+rdi]
adc r12,1
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[32+rdi]
vperm2i128 ymm0,ymm7,ymm3,0x02
vperm2i128 ymm7,ymm7,ymm3,0x13
vperm2i128 ymm3,ymm15,ymm11,0x02
vperm2i128 ymm11,ymm15,ymm11,0x13
vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi]
vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi]
vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi]
vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi]
vmovdqu YMMWORD[(0+0)+rdi],ymm0
vmovdqu YMMWORD[(32+0)+rdi],ymm3
vmovdqu YMMWORD[(64+0)+rdi],ymm7
vmovdqu YMMWORD[(96+0)+rdi],ymm11
vmovdqa ymm0,YMMWORD[((160+128))+rbp]
vperm2i128 ymm3,ymm6,ymm2,0x02
vperm2i128 ymm6,ymm6,ymm2,0x13
vperm2i128 ymm2,ymm14,ymm10,0x02
vperm2i128 ymm10,ymm14,ymm10,0x13
vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi]
vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi]
vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi]
vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi]
vmovdqu YMMWORD[(0+128)+rdi],ymm3
vmovdqu YMMWORD[(32+128)+rdi],ymm2
vmovdqu YMMWORD[(64+128)+rdi],ymm6
vmovdqu YMMWORD[(96+128)+rdi],ymm10
vperm2i128 ymm3,ymm5,ymm1,0x02
vperm2i128 ymm5,ymm5,ymm1,0x13
vperm2i128 ymm1,ymm13,ymm9,0x02
vperm2i128 ymm9,ymm13,ymm9,0x13
vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi]
vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi]
vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi]
vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi]
vmovdqu YMMWORD[(0+256)+rdi],ymm3
vmovdqu YMMWORD[(32+256)+rdi],ymm1
vmovdqu YMMWORD[(64+256)+rdi],ymm5
vmovdqu YMMWORD[(96+256)+rdi],ymm9
vperm2i128 ymm3,ymm4,ymm0,0x02
vperm2i128 ymm4,ymm4,ymm0,0x13
vperm2i128 ymm0,ymm12,ymm8,0x02
vperm2i128 ymm8,ymm12,ymm8,0x13
vpxor ymm3,ymm3,YMMWORD[((0+384))+rsi]
vpxor ymm0,ymm0,YMMWORD[((32+384))+rsi]
vpxor ymm4,ymm4,YMMWORD[((64+384))+rsi]
vpxor ymm8,ymm8,YMMWORD[((96+384))+rsi]
vmovdqu YMMWORD[(0+384)+rdi],ymm3
vmovdqu YMMWORD[(32+384)+rdi],ymm0
vmovdqu YMMWORD[(64+384)+rdi],ymm4
vmovdqu YMMWORD[(96+384)+rdi],ymm8
lea rsi,[512+rsi]
sub rbx,16*32
cmp rbx,16*32
jg NEAR $L$seal_avx2_main_loop
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
add r10,QWORD[((0+16))+rdi]
adc r11,QWORD[((8+16))+rdi]
adc r12,1
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[32+rdi]
mov rcx,10
xor r8,r8
cmp rbx,12*32
ja NEAR $L$seal_avx2_tail_512
cmp rbx,8*32
ja NEAR $L$seal_avx2_tail_384
cmp rbx,4*32
ja NEAR $L$seal_avx2_tail_256
$L$seal_avx2_tail_128:
vmovdqa ymm0,YMMWORD[$L$chacha20_consts]
vmovdqa ymm4,YMMWORD[((160+64))+rbp]
vmovdqa ymm8,YMMWORD[((160+96))+rbp]
vmovdqa ymm12,YMMWORD[$L$avx2_inc]
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vmovdqa YMMWORD[(160+160)+rbp],ymm12
$L$seal_avx2_tail_128_rounds_and_3xhash:
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[16+rdi]
$L$seal_avx2_tail_128_rounds_and_2xhash:
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,12
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,4
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,12
add r10,QWORD[((0+16))+rdi]
adc r11,QWORD[((8+16))+rdi]
adc r12,1
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[32+rdi]
dec rcx
jg NEAR $L$seal_avx2_tail_128_rounds_and_3xhash
dec r8
jge NEAR $L$seal_avx2_tail_128_rounds_and_2xhash
vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts]
vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp]
vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp]
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vperm2i128 ymm3,ymm4,ymm0,0x13
vperm2i128 ymm0,ymm4,ymm0,0x02
vperm2i128 ymm4,ymm12,ymm8,0x02
vperm2i128 ymm12,ymm12,ymm8,0x13
vmovdqa ymm8,ymm3
jmp NEAR $L$seal_avx2_short_loop
$L$seal_avx2_tail_256:
vmovdqa ymm0,YMMWORD[$L$chacha20_consts]
vmovdqa ymm4,YMMWORD[((160+64))+rbp]
vmovdqa ymm8,YMMWORD[((160+96))+rbp]
vmovdqa ymm1,ymm0
vmovdqa ymm5,ymm4
vmovdqa ymm9,ymm8
vmovdqa ymm12,YMMWORD[$L$avx2_inc]
vpaddd ymm13,ymm12,YMMWORD[((160+160))+rbp]
vpaddd ymm12,ymm12,ymm13
vmovdqa YMMWORD[(160+160)+rbp],ymm12
vmovdqa YMMWORD[(160+192)+rbp],ymm13
$L$seal_avx2_tail_256_rounds_and_3xhash:
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[16+rdi]
$L$seal_avx2_tail_256_rounds_and_2xhash:
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,12
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,4
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,12
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,4
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,12
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,4
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,12
add r10,QWORD[((0+16))+rdi]
adc r11,QWORD[((8+16))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[32+rdi]
dec rcx
jg NEAR $L$seal_avx2_tail_256_rounds_and_3xhash
dec r8
jge NEAR $L$seal_avx2_tail_256_rounds_and_2xhash
vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts]
vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp]
vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp]
vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp]
vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts]
vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp]
vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp]
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vperm2i128 ymm3,ymm5,ymm1,0x02
vperm2i128 ymm5,ymm5,ymm1,0x13
vperm2i128 ymm1,ymm13,ymm9,0x02
vperm2i128 ymm9,ymm13,ymm9,0x13
vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi]
vpxor ymm1,ymm1,YMMWORD[((32+0))+rsi]
vpxor ymm5,ymm5,YMMWORD[((64+0))+rsi]
vpxor ymm9,ymm9,YMMWORD[((96+0))+rsi]
vmovdqu YMMWORD[(0+0)+rdi],ymm3
vmovdqu YMMWORD[(32+0)+rdi],ymm1
vmovdqu YMMWORD[(64+0)+rdi],ymm5
vmovdqu YMMWORD[(96+0)+rdi],ymm9
vperm2i128 ymm3,ymm4,ymm0,0x13
vperm2i128 ymm0,ymm4,ymm0,0x02
vperm2i128 ymm4,ymm12,ymm8,0x02
vperm2i128 ymm12,ymm12,ymm8,0x13
vmovdqa ymm8,ymm3
mov rcx,4*32
lea rsi,[128+rsi]
sub rbx,4*32
jmp NEAR $L$seal_avx2_short_hash_remainder
$L$seal_avx2_tail_384:
vmovdqa ymm0,YMMWORD[$L$chacha20_consts]
vmovdqa ymm4,YMMWORD[((160+64))+rbp]
vmovdqa ymm8,YMMWORD[((160+96))+rbp]
vmovdqa ymm1,ymm0
vmovdqa ymm5,ymm4
vmovdqa ymm9,ymm8
vmovdqa ymm2,ymm0
vmovdqa ymm6,ymm4
vmovdqa ymm10,ymm8
vmovdqa ymm12,YMMWORD[$L$avx2_inc]
vpaddd ymm14,ymm12,YMMWORD[((160+160))+rbp]
vpaddd ymm13,ymm12,ymm14
vpaddd ymm12,ymm12,ymm13
vmovdqa YMMWORD[(160+160)+rbp],ymm12
vmovdqa YMMWORD[(160+192)+rbp],ymm13
vmovdqa YMMWORD[(160+224)+rbp],ymm14
$L$seal_avx2_tail_384_rounds_and_3xhash:
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[16+rdi]
$L$seal_avx2_tail_384_rounds_and_2xhash:
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,12
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,4
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,12
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,4
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol16]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpsrld ymm3,ymm6,20
vpslld ymm6,ymm6,12
vpxor ymm6,ymm6,ymm3
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol8]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpslld ymm3,ymm6,7
vpsrld ymm6,ymm6,25
vpxor ymm6,ymm6,ymm3
vpalignr ymm14,ymm14,ymm14,12
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm6,ymm6,ymm6,4
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,12
add r10,QWORD[((0+16))+rdi]
adc r11,QWORD[((8+16))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,4
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,12
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol16]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpsrld ymm3,ymm6,20
vpslld ymm6,ymm6,12
vpxor ymm6,ymm6,ymm3
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol8]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpslld ymm3,ymm6,7
vpsrld ymm6,ymm6,25
vpxor ymm6,ymm6,ymm3
vpalignr ymm14,ymm14,ymm14,4
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm6,ymm6,ymm6,12
lea rdi,[32+rdi]
dec rcx
jg NEAR $L$seal_avx2_tail_384_rounds_and_3xhash
dec r8
jge NEAR $L$seal_avx2_tail_384_rounds_and_2xhash
vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts]
vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp]
vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp]
vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp]
vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts]
vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp]
vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp]
vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp]
vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts]
vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp]
vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp]
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vperm2i128 ymm3,ymm6,ymm2,0x02
vperm2i128 ymm6,ymm6,ymm2,0x13
vperm2i128 ymm2,ymm14,ymm10,0x02
vperm2i128 ymm10,ymm14,ymm10,0x13
vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi]
vpxor ymm2,ymm2,YMMWORD[((32+0))+rsi]
vpxor ymm6,ymm6,YMMWORD[((64+0))+rsi]
vpxor ymm10,ymm10,YMMWORD[((96+0))+rsi]
vmovdqu YMMWORD[(0+0)+rdi],ymm3
vmovdqu YMMWORD[(32+0)+rdi],ymm2
vmovdqu YMMWORD[(64+0)+rdi],ymm6
vmovdqu YMMWORD[(96+0)+rdi],ymm10
vperm2i128 ymm3,ymm5,ymm1,0x02
vperm2i128 ymm5,ymm5,ymm1,0x13
vperm2i128 ymm1,ymm13,ymm9,0x02
vperm2i128 ymm9,ymm13,ymm9,0x13
vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi]
vpxor ymm1,ymm1,YMMWORD[((32+128))+rsi]
vpxor ymm5,ymm5,YMMWORD[((64+128))+rsi]
vpxor ymm9,ymm9,YMMWORD[((96+128))+rsi]
vmovdqu YMMWORD[(0+128)+rdi],ymm3
vmovdqu YMMWORD[(32+128)+rdi],ymm1
vmovdqu YMMWORD[(64+128)+rdi],ymm5
vmovdqu YMMWORD[(96+128)+rdi],ymm9
vperm2i128 ymm3,ymm4,ymm0,0x13
vperm2i128 ymm0,ymm4,ymm0,0x02
vperm2i128 ymm4,ymm12,ymm8,0x02
vperm2i128 ymm12,ymm12,ymm8,0x13
vmovdqa ymm8,ymm3
mov rcx,8*32
lea rsi,[256+rsi]
sub rbx,8*32
jmp NEAR $L$seal_avx2_short_hash_remainder
$L$seal_avx2_tail_512:
vmovdqa ymm0,YMMWORD[$L$chacha20_consts]
vmovdqa ymm4,YMMWORD[((160+64))+rbp]
vmovdqa ymm8,YMMWORD[((160+96))+rbp]
vmovdqa ymm1,ymm0
vmovdqa ymm5,ymm4
vmovdqa ymm9,ymm8
vmovdqa ymm2,ymm0
vmovdqa ymm6,ymm4
vmovdqa ymm10,ymm8
vmovdqa ymm3,ymm0
vmovdqa ymm7,ymm4
vmovdqa ymm11,ymm8
vmovdqa ymm12,YMMWORD[$L$avx2_inc]
vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp]
vpaddd ymm14,ymm12,ymm15
vpaddd ymm13,ymm12,ymm14
vpaddd ymm12,ymm12,ymm13
vmovdqa YMMWORD[(160+256)+rbp],ymm15
vmovdqa YMMWORD[(160+224)+rbp],ymm14
vmovdqa YMMWORD[(160+192)+rbp],ymm13
vmovdqa YMMWORD[(160+160)+rbp],ymm12
$L$seal_avx2_tail_512_rounds_and_3xhash:
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[16+rdi]
$L$seal_avx2_tail_512_rounds_and_2xhash:
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vmovdqa ymm8,YMMWORD[$L$rol16]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,20
vpslld ymm7,ymm7,32-20
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,20
vpslld ymm6,ymm6,32-20
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,20
vpslld ymm5,ymm5,32-20
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,20
vpslld ymm4,ymm4,32-20
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[$L$rol8]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,25
vpslld ymm7,ymm7,32-25
vpxor ymm7,ymm7,ymm8
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
vpsrld ymm8,ymm6,25
vpslld ymm6,ymm6,32-25
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,25
vpslld ymm5,ymm5,32-25
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,25
vpslld ymm4,ymm4,32-25
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[((160+128))+rbp]
vpalignr ymm7,ymm7,ymm7,4
vpalignr ymm11,ymm11,ymm11,8
vpalignr ymm15,ymm15,ymm15,12
vpalignr ymm6,ymm6,ymm6,4
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm14,ymm14,ymm14,12
vpalignr ymm5,ymm5,ymm5,4
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm13,ymm13,ymm13,12
vpalignr ymm4,ymm4,ymm4,4
add r15,rax
adc r9,rdx
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm12,ymm12,ymm12,12
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vmovdqa ymm8,YMMWORD[$L$rol16]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,20
vpslld ymm7,ymm7,32-20
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,20
vpslld ymm6,ymm6,32-20
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,20
vpslld ymm5,ymm5,32-20
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,20
vpslld ymm4,ymm4,32-20
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[$L$rol8]
vpaddd ymm3,ymm3,ymm7
vpaddd ymm2,ymm2,ymm6
add r10,QWORD[((0+16))+rdi]
adc r11,QWORD[((8+16))+rdi]
adc r12,1
vpaddd ymm1,ymm1,ymm5
vpaddd ymm0,ymm0,ymm4
vpxor ymm15,ymm15,ymm3
vpxor ymm14,ymm14,ymm2
vpxor ymm13,ymm13,ymm1
vpxor ymm12,ymm12,ymm0
vpshufb ymm15,ymm15,ymm8
vpshufb ymm14,ymm14,ymm8
vpshufb ymm13,ymm13,ymm8
vpshufb ymm12,ymm12,ymm8
vpaddd ymm11,ymm11,ymm15
vpaddd ymm10,ymm10,ymm14
vpaddd ymm9,ymm9,ymm13
vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp]
vpxor ymm7,ymm7,ymm11
vpxor ymm6,ymm6,ymm10
vpxor ymm5,ymm5,ymm9
vpxor ymm4,ymm4,ymm8
vmovdqa YMMWORD[(160+128)+rbp],ymm8
vpsrld ymm8,ymm7,25
mov rdx,QWORD[((0+160+0))+rbp]
mov r15,rdx
mulx r14,r13,r10
mulx rdx,rax,r11
imul r15,r12
add r14,rax
adc r15,rdx
vpslld ymm7,ymm7,32-25
vpxor ymm7,ymm7,ymm8
vpsrld ymm8,ymm6,25
vpslld ymm6,ymm6,32-25
vpxor ymm6,ymm6,ymm8
vpsrld ymm8,ymm5,25
vpslld ymm5,ymm5,32-25
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm4,25
vpslld ymm4,ymm4,32-25
vpxor ymm4,ymm4,ymm8
vmovdqa ymm8,YMMWORD[((160+128))+rbp]
vpalignr ymm7,ymm7,ymm7,12
vpalignr ymm11,ymm11,ymm11,8
vpalignr ymm15,ymm15,ymm15,4
vpalignr ymm6,ymm6,ymm6,12
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm14,ymm14,ymm14,4
vpalignr ymm5,ymm5,ymm5,12
vpalignr ymm9,ymm9,ymm9,8
mov rdx,QWORD[((8+160+0))+rbp]
mulx rax,r10,r10
add r14,r10
mulx r9,r11,r11
adc r15,r11
adc r9,0
imul rdx,r12
vpalignr ymm13,ymm13,ymm13,4
vpalignr ymm4,ymm4,ymm4,12
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm12,ymm12,ymm12,4
add r15,rax
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[32+rdi]
dec rcx
jg NEAR $L$seal_avx2_tail_512_rounds_and_3xhash
dec r8
jge NEAR $L$seal_avx2_tail_512_rounds_and_2xhash
vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts]
vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp]
vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp]
vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp]
vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts]
vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp]
vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp]
vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp]
vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts]
vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp]
vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp]
vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp]
vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts]
vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp]
vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp]
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vmovdqa YMMWORD[(160+128)+rbp],ymm0
vperm2i128 ymm0,ymm7,ymm3,0x02
vperm2i128 ymm7,ymm7,ymm3,0x13
vperm2i128 ymm3,ymm15,ymm11,0x02
vperm2i128 ymm11,ymm15,ymm11,0x13
vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi]
vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi]
vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi]
vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi]
vmovdqu YMMWORD[(0+0)+rdi],ymm0
vmovdqu YMMWORD[(32+0)+rdi],ymm3
vmovdqu YMMWORD[(64+0)+rdi],ymm7
vmovdqu YMMWORD[(96+0)+rdi],ymm11
vmovdqa ymm0,YMMWORD[((160+128))+rbp]
vperm2i128 ymm3,ymm6,ymm2,0x02
vperm2i128 ymm6,ymm6,ymm2,0x13
vperm2i128 ymm2,ymm14,ymm10,0x02
vperm2i128 ymm10,ymm14,ymm10,0x13
vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi]
vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi]
vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi]
vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi]
vmovdqu YMMWORD[(0+128)+rdi],ymm3
vmovdqu YMMWORD[(32+128)+rdi],ymm2
vmovdqu YMMWORD[(64+128)+rdi],ymm6
vmovdqu YMMWORD[(96+128)+rdi],ymm10
vperm2i128 ymm3,ymm5,ymm1,0x02
vperm2i128 ymm5,ymm5,ymm1,0x13
vperm2i128 ymm1,ymm13,ymm9,0x02
vperm2i128 ymm9,ymm13,ymm9,0x13
vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi]
vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi]
vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi]
vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi]
vmovdqu YMMWORD[(0+256)+rdi],ymm3
vmovdqu YMMWORD[(32+256)+rdi],ymm1
vmovdqu YMMWORD[(64+256)+rdi],ymm5
vmovdqu YMMWORD[(96+256)+rdi],ymm9
vperm2i128 ymm3,ymm4,ymm0,0x13
vperm2i128 ymm0,ymm4,ymm0,0x02
vperm2i128 ymm4,ymm12,ymm8,0x02
vperm2i128 ymm12,ymm12,ymm8,0x13
vmovdqa ymm8,ymm3
mov rcx,12*32
lea rsi,[384+rsi]
sub rbx,12*32
jmp NEAR $L$seal_avx2_short_hash_remainder
$L$seal_avx2_320:
vmovdqa ymm1,ymm0
vmovdqa ymm2,ymm0
vmovdqa ymm5,ymm4
vmovdqa ymm6,ymm4
vmovdqa ymm9,ymm8
vmovdqa ymm10,ymm8
vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc]
vpaddd ymm14,ymm13,YMMWORD[$L$avx2_inc]
vmovdqa ymm7,ymm4
vmovdqa ymm11,ymm8
vmovdqa YMMWORD[(160+160)+rbp],ymm12
vmovdqa YMMWORD[(160+192)+rbp],ymm13
vmovdqa YMMWORD[(160+224)+rbp],ymm14
mov r10,10
$L$seal_avx2_320_rounds:
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,12
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,4
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,12
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,4
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol16]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpsrld ymm3,ymm6,20
vpslld ymm6,ymm6,12
vpxor ymm6,ymm6,ymm3
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol8]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpslld ymm3,ymm6,7
vpsrld ymm6,ymm6,25
vpxor ymm6,ymm6,ymm3
vpalignr ymm14,ymm14,ymm14,12
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm6,ymm6,ymm6,4
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,12
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,4
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,12
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol16]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpsrld ymm3,ymm6,20
vpslld ymm6,ymm6,12
vpxor ymm6,ymm6,ymm3
vpaddd ymm2,ymm2,ymm6
vpxor ymm14,ymm14,ymm2
vpshufb ymm14,ymm14,YMMWORD[$L$rol8]
vpaddd ymm10,ymm10,ymm14
vpxor ymm6,ymm6,ymm10
vpslld ymm3,ymm6,7
vpsrld ymm6,ymm6,25
vpxor ymm6,ymm6,ymm3
vpalignr ymm14,ymm14,ymm14,4
vpalignr ymm10,ymm10,ymm10,8
vpalignr ymm6,ymm6,ymm6,12
dec r10
jne NEAR $L$seal_avx2_320_rounds
vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts]
vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts]
vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts]
vpaddd ymm4,ymm4,ymm7
vpaddd ymm5,ymm5,ymm7
vpaddd ymm6,ymm6,ymm7
vpaddd ymm8,ymm8,ymm11
vpaddd ymm9,ymm9,ymm11
vpaddd ymm10,ymm10,ymm11
vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp]
vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp]
vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp]
vperm2i128 ymm3,ymm4,ymm0,0x02
vpand ymm3,ymm3,YMMWORD[$L$clamp]
vmovdqa YMMWORD[(160+0)+rbp],ymm3
vperm2i128 ymm0,ymm4,ymm0,0x13
vperm2i128 ymm4,ymm12,ymm8,0x13
vperm2i128 ymm8,ymm5,ymm1,0x02
vperm2i128 ymm12,ymm13,ymm9,0x02
vperm2i128 ymm1,ymm5,ymm1,0x13
vperm2i128 ymm5,ymm13,ymm9,0x13
vperm2i128 ymm9,ymm6,ymm2,0x02
vperm2i128 ymm13,ymm14,ymm10,0x02
vperm2i128 ymm2,ymm6,ymm2,0x13
vperm2i128 ymm6,ymm14,ymm10,0x13
jmp NEAR $L$seal_avx2_short
$L$seal_avx2_192:
vmovdqa ymm1,ymm0
vmovdqa ymm2,ymm0
vmovdqa ymm5,ymm4
vmovdqa ymm6,ymm4
vmovdqa ymm9,ymm8
vmovdqa ymm10,ymm8
vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc]
vmovdqa ymm11,ymm12
vmovdqa ymm15,ymm13
mov r10,10
$L$seal_avx2_192_rounds:
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,12
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,4
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,12
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,4
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol16]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpsrld ymm3,ymm4,20
vpslld ymm4,ymm4,12
vpxor ymm4,ymm4,ymm3
vpaddd ymm0,ymm0,ymm4
vpxor ymm12,ymm12,ymm0
vpshufb ymm12,ymm12,YMMWORD[$L$rol8]
vpaddd ymm8,ymm8,ymm12
vpxor ymm4,ymm4,ymm8
vpslld ymm3,ymm4,7
vpsrld ymm4,ymm4,25
vpxor ymm4,ymm4,ymm3
vpalignr ymm12,ymm12,ymm12,4
vpalignr ymm8,ymm8,ymm8,8
vpalignr ymm4,ymm4,ymm4,12
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol16]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpsrld ymm3,ymm5,20
vpslld ymm5,ymm5,12
vpxor ymm5,ymm5,ymm3
vpaddd ymm1,ymm1,ymm5
vpxor ymm13,ymm13,ymm1
vpshufb ymm13,ymm13,YMMWORD[$L$rol8]
vpaddd ymm9,ymm9,ymm13
vpxor ymm5,ymm5,ymm9
vpslld ymm3,ymm5,7
vpsrld ymm5,ymm5,25
vpxor ymm5,ymm5,ymm3
vpalignr ymm13,ymm13,ymm13,4
vpalignr ymm9,ymm9,ymm9,8
vpalignr ymm5,ymm5,ymm5,12
dec r10
jne NEAR $L$seal_avx2_192_rounds
vpaddd ymm0,ymm0,ymm2
vpaddd ymm1,ymm1,ymm2
vpaddd ymm4,ymm4,ymm6
vpaddd ymm5,ymm5,ymm6
vpaddd ymm8,ymm8,ymm10
vpaddd ymm9,ymm9,ymm10
vpaddd ymm12,ymm12,ymm11
vpaddd ymm13,ymm13,ymm15
vperm2i128 ymm3,ymm4,ymm0,0x02
vpand ymm3,ymm3,YMMWORD[$L$clamp]
vmovdqa YMMWORD[(160+0)+rbp],ymm3
vperm2i128 ymm0,ymm4,ymm0,0x13
vperm2i128 ymm4,ymm12,ymm8,0x13
vperm2i128 ymm8,ymm5,ymm1,0x02
vperm2i128 ymm12,ymm13,ymm9,0x02
vperm2i128 ymm1,ymm5,ymm1,0x13
vperm2i128 ymm5,ymm13,ymm9,0x13
$L$seal_avx2_short:
mov r8,r8
call poly_hash_ad_internal
xor rcx,rcx
$L$seal_avx2_short_hash_remainder:
cmp rcx,16
jb NEAR $L$seal_avx2_short_loop
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
sub rcx,16
add rdi,16
jmp NEAR $L$seal_avx2_short_hash_remainder
$L$seal_avx2_short_loop:
cmp rbx,32
jb NEAR $L$seal_avx2_short_tail
sub rbx,32
vpxor ymm0,ymm0,YMMWORD[rsi]
vmovdqu YMMWORD[rdi],ymm0
lea rsi,[32+rsi]
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
add r10,QWORD[((0+16))+rdi]
adc r11,QWORD[((8+16))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[32+rdi]
vmovdqa ymm0,ymm4
vmovdqa ymm4,ymm8
vmovdqa ymm8,ymm12
vmovdqa ymm12,ymm1
vmovdqa ymm1,ymm5
vmovdqa ymm5,ymm9
vmovdqa ymm9,ymm13
vmovdqa ymm13,ymm2
vmovdqa ymm2,ymm6
jmp NEAR $L$seal_avx2_short_loop
$L$seal_avx2_short_tail:
cmp rbx,16
jb NEAR $L$seal_avx2_exit
sub rbx,16
vpxor xmm3,xmm0,XMMWORD[rsi]
vmovdqu XMMWORD[rdi],xmm3
lea rsi,[16+rsi]
add r10,QWORD[((0+0))+rdi]
adc r11,QWORD[((8+0))+rdi]
adc r12,1
mov rax,QWORD[((0+160+0))+rbp]
mov r15,rax
mul r10
mov r13,rax
mov r14,rdx
mov rax,QWORD[((0+160+0))+rbp]
mul r11
imul r15,r12
add r14,rax
adc r15,rdx
mov rax,QWORD[((8+160+0))+rbp]
mov r9,rax
mul r10
add r14,rax
adc rdx,0
mov r10,rdx
mov rax,QWORD[((8+160+0))+rbp]
mul r11
add r15,rax
adc rdx,0
imul r9,r12
add r15,r10
adc r9,rdx
mov r10,r13
mov r11,r14
mov r12,r15
and r12,3
mov r13,r15
and r13,-4
mov r14,r9
shrd r15,r9,2
shr r9,2
add r15,r13
adc r9,r14
add r10,r15
adc r11,r9
adc r12,0
lea rdi,[16+rdi]
vextracti128 xmm0,ymm0,1
$L$seal_avx2_exit:
vzeroupper
jmp NEAR $L$seal_sse_tail_16
%else
; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
ret
%endif