blob: a261463980f0ebdafd50b6353fa5d969d7bc9807 [file] [log] [blame] [edit]
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#include <openssl/asm_base.h>
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
.section __DATA,__const
.p2align 6
chacha20_poly1305_constants:
L$chacha20_consts:
.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
L$rol8:
.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
L$rol16:
.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
L$avx2_init:
.long 0,0,0,0
L$sse_inc:
.long 1,0,0,0
L$avx2_inc:
.long 2,0,0,0,2,0,0,0
L$clamp:
.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
.quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
.p2align 4
L$and_masks:
.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
.text
.p2align 6
poly_hash_ad_internal:
xorq %r10,%r10
xorq %r11,%r11
xorq %r12,%r12
cmpq $13,%r8
jne L$hash_ad_loop
L$poly_fast_tls_ad:
movq (%rcx),%r10
movq 5(%rcx),%r11
shrq $24,%r11
movq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
ret
L$hash_ad_loop:
cmpq $16,%r8
jb L$hash_ad_tail
addq 0+0(%rcx),%r10
adcq 8+0(%rcx),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rcx),%rcx
subq $16,%r8
jmp L$hash_ad_loop
L$hash_ad_tail:
cmpq $0,%r8
je L$hash_ad_done
xorq %r13,%r13
xorq %r14,%r14
xorq %r15,%r15
addq %r8,%rcx
L$hash_ad_tail_loop:
shldq $8,%r13,%r14
shlq $8,%r13
movzbq -1(%rcx),%r15
xorq %r15,%r13
decq %rcx
decq %r8
jne L$hash_ad_tail_loop
addq %r13,%r10
adcq %r14,%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
L$hash_ad_done:
ret
.globl _chacha20_poly1305_open_sse41
.private_extern _chacha20_poly1305_open_sse41
.p2align 6
_chacha20_poly1305_open_sse41:
_CET_ENDBR
pushq %rbp
pushq %rbx
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %r9
subq $288 + 0 + 32,%rsp
leaq 32(%rsp),%rbp
andq $-32,%rbp
movq %rdx,%rbx
movq %r8,0+0+32(%rbp)
movq %rbx,8+0+32(%rbp)
cmpq $128,%rbx
jbe L$open_sse_128
movdqa L$chacha20_consts(%rip),%xmm0
movdqu 0(%r9),%xmm4
movdqu 16(%r9),%xmm8
movdqu 32(%r9),%xmm12
movdqa %xmm12,%xmm7
movdqa %xmm4,0+48(%rbp)
movdqa %xmm8,0+64(%rbp)
movdqa %xmm12,0+96(%rbp)
movq $10,%r10
L$open_sse_init_rounds:
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
decq %r10
jne L$open_sse_init_rounds
paddd L$chacha20_consts(%rip),%xmm0
paddd 0+48(%rbp),%xmm4
pand L$clamp(%rip),%xmm0
movdqa %xmm0,0+0(%rbp)
movdqa %xmm4,0+16(%rbp)
movq %r8,%r8
call poly_hash_ad_internal
L$open_sse_main_loop:
cmpq $256,%rbx
jb L$open_sse_tail
movdqa L$chacha20_consts(%rip),%xmm0
movdqa 0+48(%rbp),%xmm4
movdqa 0+64(%rbp),%xmm8
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm5
movdqa %xmm8,%xmm9
movdqa %xmm0,%xmm2
movdqa %xmm4,%xmm6
movdqa %xmm8,%xmm10
movdqa %xmm0,%xmm3
movdqa %xmm4,%xmm7
movdqa %xmm8,%xmm11
movdqa 0+96(%rbp),%xmm15
paddd L$sse_inc(%rip),%xmm15
movdqa %xmm15,%xmm14
paddd L$sse_inc(%rip),%xmm14
movdqa %xmm14,%xmm13
paddd L$sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm12
paddd L$sse_inc(%rip),%xmm12
movdqa %xmm12,0+96(%rbp)
movdqa %xmm13,0+112(%rbp)
movdqa %xmm14,0+128(%rbp)
movdqa %xmm15,0+144(%rbp)
movq $4,%rcx
movq %rsi,%r8
L$open_sse_main_loop_rounds:
movdqa %xmm8,0+80(%rbp)
movdqa L$rol16(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 0+80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
addq 0+0(%r8),%r10
adcq 8+0(%r8),%r11
adcq $1,%r12
leaq 16(%r8),%r8
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,0+80(%rbp)
movdqa %xmm7,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm4
pxor %xmm8,%xmm4
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movdqa L$rol8(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 0+80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,0+80(%rbp)
movdqa %xmm7,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm4
pxor %xmm8,%xmm4
movdqa 0+80(%rbp),%xmm8
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
.byte 102,15,58,15,255,4
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,12
.byte 102,15,58,15,246,4
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,12
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
movdqa %xmm8,0+80(%rbp)
movdqa L$rol16(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 0+80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,0+80(%rbp)
movdqa %xmm7,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm4
pxor %xmm8,%xmm4
movdqa L$rol8(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 0+80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,0+80(%rbp)
movdqa %xmm7,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm4
pxor %xmm8,%xmm4
movdqa 0+80(%rbp),%xmm8
.byte 102,15,58,15,255,12
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,4
.byte 102,15,58,15,246,12
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,4
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
decq %rcx
jge L$open_sse_main_loop_rounds
addq 0+0(%r8),%r10
adcq 8+0(%r8),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%r8),%r8
cmpq $-6,%rcx
jg L$open_sse_main_loop_rounds
paddd L$chacha20_consts(%rip),%xmm3
paddd 0+48(%rbp),%xmm7
paddd 0+64(%rbp),%xmm11
paddd 0+144(%rbp),%xmm15
paddd L$chacha20_consts(%rip),%xmm2
paddd 0+48(%rbp),%xmm6
paddd 0+64(%rbp),%xmm10
paddd 0+128(%rbp),%xmm14
paddd L$chacha20_consts(%rip),%xmm1
paddd 0+48(%rbp),%xmm5
paddd 0+64(%rbp),%xmm9
paddd 0+112(%rbp),%xmm13
paddd L$chacha20_consts(%rip),%xmm0
paddd 0+48(%rbp),%xmm4
paddd 0+64(%rbp),%xmm8
paddd 0+96(%rbp),%xmm12
movdqa %xmm12,0+80(%rbp)
movdqu 0 + 0(%rsi),%xmm12
pxor %xmm3,%xmm12
movdqu %xmm12,0 + 0(%rdi)
movdqu 16 + 0(%rsi),%xmm12
pxor %xmm7,%xmm12
movdqu %xmm12,16 + 0(%rdi)
movdqu 32 + 0(%rsi),%xmm12
pxor %xmm11,%xmm12
movdqu %xmm12,32 + 0(%rdi)
movdqu 48 + 0(%rsi),%xmm12
pxor %xmm15,%xmm12
movdqu %xmm12,48 + 0(%rdi)
movdqu 0 + 64(%rsi),%xmm3
movdqu 16 + 64(%rsi),%xmm7
movdqu 32 + 64(%rsi),%xmm11
movdqu 48 + 64(%rsi),%xmm15
pxor %xmm3,%xmm2
pxor %xmm7,%xmm6
pxor %xmm11,%xmm10
pxor %xmm14,%xmm15
movdqu %xmm2,0 + 64(%rdi)
movdqu %xmm6,16 + 64(%rdi)
movdqu %xmm10,32 + 64(%rdi)
movdqu %xmm15,48 + 64(%rdi)
movdqu 0 + 128(%rsi),%xmm3
movdqu 16 + 128(%rsi),%xmm7
movdqu 32 + 128(%rsi),%xmm11
movdqu 48 + 128(%rsi),%xmm15
pxor %xmm3,%xmm1
pxor %xmm7,%xmm5
pxor %xmm11,%xmm9
pxor %xmm13,%xmm15
movdqu %xmm1,0 + 128(%rdi)
movdqu %xmm5,16 + 128(%rdi)
movdqu %xmm9,32 + 128(%rdi)
movdqu %xmm15,48 + 128(%rdi)
movdqu 0 + 192(%rsi),%xmm3
movdqu 16 + 192(%rsi),%xmm7
movdqu 32 + 192(%rsi),%xmm11
movdqu 48 + 192(%rsi),%xmm15
pxor %xmm3,%xmm0
pxor %xmm7,%xmm4
pxor %xmm11,%xmm8
pxor 0+80(%rbp),%xmm15
movdqu %xmm0,0 + 192(%rdi)
movdqu %xmm4,16 + 192(%rdi)
movdqu %xmm8,32 + 192(%rdi)
movdqu %xmm15,48 + 192(%rdi)
leaq 256(%rsi),%rsi
leaq 256(%rdi),%rdi
subq $256,%rbx
jmp L$open_sse_main_loop
L$open_sse_tail:
testq %rbx,%rbx
jz L$open_sse_finalize
cmpq $192,%rbx
ja L$open_sse_tail_256
cmpq $128,%rbx
ja L$open_sse_tail_192
cmpq $64,%rbx
ja L$open_sse_tail_128
movdqa L$chacha20_consts(%rip),%xmm0
movdqa 0+48(%rbp),%xmm4
movdqa 0+64(%rbp),%xmm8
movdqa 0+96(%rbp),%xmm12
paddd L$sse_inc(%rip),%xmm12
movdqa %xmm12,0+96(%rbp)
xorq %r8,%r8
movq %rbx,%rcx
cmpq $16,%rcx
jb L$open_sse_tail_64_rounds
L$open_sse_tail_64_rounds_and_x1hash:
addq 0+0(%rsi,%r8,1),%r10
adcq 8+0(%rsi,%r8,1),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
subq $16,%rcx
L$open_sse_tail_64_rounds:
addq $16,%r8
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
cmpq $16,%rcx
jae L$open_sse_tail_64_rounds_and_x1hash
cmpq $160,%r8
jne L$open_sse_tail_64_rounds
paddd L$chacha20_consts(%rip),%xmm0
paddd 0+48(%rbp),%xmm4
paddd 0+64(%rbp),%xmm8
paddd 0+96(%rbp),%xmm12
jmp L$open_sse_tail_64_dec_loop
L$open_sse_tail_128:
movdqa L$chacha20_consts(%rip),%xmm0
movdqa 0+48(%rbp),%xmm4
movdqa 0+64(%rbp),%xmm8
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm5
movdqa %xmm8,%xmm9
movdqa 0+96(%rbp),%xmm13
paddd L$sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm12
paddd L$sse_inc(%rip),%xmm12
movdqa %xmm12,0+96(%rbp)
movdqa %xmm13,0+112(%rbp)
movq %rbx,%rcx
andq $-16,%rcx
xorq %r8,%r8
L$open_sse_tail_128_rounds_and_x1hash:
addq 0+0(%rsi,%r8,1),%r10
adcq 8+0(%rsi,%r8,1),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
L$open_sse_tail_128_rounds:
addq $16,%r8
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
cmpq %rcx,%r8
jb L$open_sse_tail_128_rounds_and_x1hash
cmpq $160,%r8
jne L$open_sse_tail_128_rounds
paddd L$chacha20_consts(%rip),%xmm1
paddd 0+48(%rbp),%xmm5
paddd 0+64(%rbp),%xmm9
paddd 0+112(%rbp),%xmm13
paddd L$chacha20_consts(%rip),%xmm0
paddd 0+48(%rbp),%xmm4
paddd 0+64(%rbp),%xmm8
paddd 0+96(%rbp),%xmm12
movdqu 0 + 0(%rsi),%xmm3
movdqu 16 + 0(%rsi),%xmm7
movdqu 32 + 0(%rsi),%xmm11
movdqu 48 + 0(%rsi),%xmm15
pxor %xmm3,%xmm1
pxor %xmm7,%xmm5
pxor %xmm11,%xmm9
pxor %xmm13,%xmm15
movdqu %xmm1,0 + 0(%rdi)
movdqu %xmm5,16 + 0(%rdi)
movdqu %xmm9,32 + 0(%rdi)
movdqu %xmm15,48 + 0(%rdi)
subq $64,%rbx
leaq 64(%rsi),%rsi
leaq 64(%rdi),%rdi
jmp L$open_sse_tail_64_dec_loop
L$open_sse_tail_192:
movdqa L$chacha20_consts(%rip),%xmm0
movdqa 0+48(%rbp),%xmm4
movdqa 0+64(%rbp),%xmm8
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm5
movdqa %xmm8,%xmm9
movdqa %xmm0,%xmm2
movdqa %xmm4,%xmm6
movdqa %xmm8,%xmm10
movdqa 0+96(%rbp),%xmm14
paddd L$sse_inc(%rip),%xmm14
movdqa %xmm14,%xmm13
paddd L$sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm12
paddd L$sse_inc(%rip),%xmm12
movdqa %xmm12,0+96(%rbp)
movdqa %xmm13,0+112(%rbp)
movdqa %xmm14,0+128(%rbp)
movq %rbx,%rcx
movq $160,%r8
cmpq $160,%rcx
cmovgq %r8,%rcx
andq $-16,%rcx
xorq %r8,%r8
L$open_sse_tail_192_rounds_and_x1hash:
addq 0+0(%rsi,%r8,1),%r10
adcq 8+0(%rsi,%r8,1),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
L$open_sse_tail_192_rounds:
addq $16,%r8
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb L$rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $12,%xmm3
psrld $20,%xmm6
pxor %xmm3,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb L$rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $7,%xmm3
psrld $25,%xmm6
pxor %xmm3,%xmm6
.byte 102,15,58,15,246,4
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb L$rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $12,%xmm3
psrld $20,%xmm6
pxor %xmm3,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb L$rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $7,%xmm3
psrld $25,%xmm6
pxor %xmm3,%xmm6
.byte 102,15,58,15,246,12
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,4
cmpq %rcx,%r8
jb L$open_sse_tail_192_rounds_and_x1hash
cmpq $160,%r8
jne L$open_sse_tail_192_rounds
cmpq $176,%rbx
jb L$open_sse_tail_192_finish
addq 0+160(%rsi),%r10
adcq 8+160(%rsi),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
cmpq $192,%rbx
jb L$open_sse_tail_192_finish
addq 0+176(%rsi),%r10
adcq 8+176(%rsi),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
L$open_sse_tail_192_finish:
paddd L$chacha20_consts(%rip),%xmm2
paddd 0+48(%rbp),%xmm6
paddd 0+64(%rbp),%xmm10
paddd 0+128(%rbp),%xmm14
paddd L$chacha20_consts(%rip),%xmm1
paddd 0+48(%rbp),%xmm5
paddd 0+64(%rbp),%xmm9
paddd 0+112(%rbp),%xmm13
paddd L$chacha20_consts(%rip),%xmm0
paddd 0+48(%rbp),%xmm4
paddd 0+64(%rbp),%xmm8
paddd 0+96(%rbp),%xmm12
movdqu 0 + 0(%rsi),%xmm3
movdqu 16 + 0(%rsi),%xmm7
movdqu 32 + 0(%rsi),%xmm11
movdqu 48 + 0(%rsi),%xmm15
pxor %xmm3,%xmm2
pxor %xmm7,%xmm6
pxor %xmm11,%xmm10
pxor %xmm14,%xmm15
movdqu %xmm2,0 + 0(%rdi)
movdqu %xmm6,16 + 0(%rdi)
movdqu %xmm10,32 + 0(%rdi)
movdqu %xmm15,48 + 0(%rdi)
movdqu 0 + 64(%rsi),%xmm3
movdqu 16 + 64(%rsi),%xmm7
movdqu 32 + 64(%rsi),%xmm11
movdqu 48 + 64(%rsi),%xmm15
pxor %xmm3,%xmm1
pxor %xmm7,%xmm5
pxor %xmm11,%xmm9
pxor %xmm13,%xmm15
movdqu %xmm1,0 + 64(%rdi)
movdqu %xmm5,16 + 64(%rdi)
movdqu %xmm9,32 + 64(%rdi)
movdqu %xmm15,48 + 64(%rdi)
subq $128,%rbx
leaq 128(%rsi),%rsi
leaq 128(%rdi),%rdi
jmp L$open_sse_tail_64_dec_loop
L$open_sse_tail_256:
movdqa L$chacha20_consts(%rip),%xmm0
movdqa 0+48(%rbp),%xmm4
movdqa 0+64(%rbp),%xmm8
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm5
movdqa %xmm8,%xmm9
movdqa %xmm0,%xmm2
movdqa %xmm4,%xmm6
movdqa %xmm8,%xmm10
movdqa %xmm0,%xmm3
movdqa %xmm4,%xmm7
movdqa %xmm8,%xmm11
movdqa 0+96(%rbp),%xmm15
paddd L$sse_inc(%rip),%xmm15
movdqa %xmm15,%xmm14
paddd L$sse_inc(%rip),%xmm14
movdqa %xmm14,%xmm13
paddd L$sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm12
paddd L$sse_inc(%rip),%xmm12
movdqa %xmm12,0+96(%rbp)
movdqa %xmm13,0+112(%rbp)
movdqa %xmm14,0+128(%rbp)
movdqa %xmm15,0+144(%rbp)
xorq %r8,%r8
L$open_sse_tail_256_rounds_and_x1hash:
addq 0+0(%rsi,%r8,1),%r10
adcq 8+0(%rsi,%r8,1),%r11
adcq $1,%r12
movdqa %xmm11,0+80(%rbp)
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm11
pslld $12,%xmm11
psrld $20,%xmm4
pxor %xmm11,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm11
pslld $7,%xmm11
psrld $25,%xmm4
pxor %xmm11,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm11
pslld $12,%xmm11
psrld $20,%xmm5
pxor %xmm11,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm11
pslld $7,%xmm11
psrld $25,%xmm5
pxor %xmm11,%xmm5
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb L$rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm11
pslld $12,%xmm11
psrld $20,%xmm6
pxor %xmm11,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb L$rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm11
pslld $7,%xmm11
psrld $25,%xmm6
pxor %xmm11,%xmm6
.byte 102,15,58,15,246,4
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,12
movdqa 0+80(%rbp),%xmm11
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movdqa %xmm9,0+80(%rbp)
paddd %xmm7,%xmm3
pxor %xmm3,%xmm15
pshufb L$rol16(%rip),%xmm15
paddd %xmm15,%xmm11
pxor %xmm11,%xmm7
movdqa %xmm7,%xmm9
pslld $12,%xmm9
psrld $20,%xmm7
pxor %xmm9,%xmm7
paddd %xmm7,%xmm3
pxor %xmm3,%xmm15
pshufb L$rol8(%rip),%xmm15
paddd %xmm15,%xmm11
pxor %xmm11,%xmm7
movdqa %xmm7,%xmm9
pslld $7,%xmm9
psrld $25,%xmm7
pxor %xmm9,%xmm7
.byte 102,15,58,15,255,4
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,12
movdqa 0+80(%rbp),%xmm9
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
movdqa %xmm11,0+80(%rbp)
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm11
pslld $12,%xmm11
psrld $20,%xmm4
pxor %xmm11,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm11
pslld $7,%xmm11
psrld $25,%xmm4
pxor %xmm11,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm11
pslld $12,%xmm11
psrld $20,%xmm5
pxor %xmm11,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm11
pslld $7,%xmm11
psrld $25,%xmm5
pxor %xmm11,%xmm5
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb L$rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm11
pslld $12,%xmm11
psrld $20,%xmm6
pxor %xmm11,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb L$rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm11
pslld $7,%xmm11
psrld $25,%xmm6
pxor %xmm11,%xmm6
.byte 102,15,58,15,246,12
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,4
movdqa 0+80(%rbp),%xmm11
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
movdqa %xmm9,0+80(%rbp)
paddd %xmm7,%xmm3
pxor %xmm3,%xmm15
pshufb L$rol16(%rip),%xmm15
paddd %xmm15,%xmm11
pxor %xmm11,%xmm7
movdqa %xmm7,%xmm9
pslld $12,%xmm9
psrld $20,%xmm7
pxor %xmm9,%xmm7
paddd %xmm7,%xmm3
pxor %xmm3,%xmm15
pshufb L$rol8(%rip),%xmm15
paddd %xmm15,%xmm11
pxor %xmm11,%xmm7
movdqa %xmm7,%xmm9
pslld $7,%xmm9
psrld $25,%xmm7
pxor %xmm9,%xmm7
.byte 102,15,58,15,255,12
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,4
movdqa 0+80(%rbp),%xmm9
addq $16,%r8
cmpq $160,%r8
jb L$open_sse_tail_256_rounds_and_x1hash
movq %rbx,%rcx
andq $-16,%rcx
L$open_sse_tail_256_hash:
addq 0+0(%rsi,%r8,1),%r10
adcq 8+0(%rsi,%r8,1),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
addq $16,%r8
cmpq %rcx,%r8
jb L$open_sse_tail_256_hash
paddd L$chacha20_consts(%rip),%xmm3
paddd 0+48(%rbp),%xmm7
paddd 0+64(%rbp),%xmm11
paddd 0+144(%rbp),%xmm15
paddd L$chacha20_consts(%rip),%xmm2
paddd 0+48(%rbp),%xmm6
paddd 0+64(%rbp),%xmm10
paddd 0+128(%rbp),%xmm14
paddd L$chacha20_consts(%rip),%xmm1
paddd 0+48(%rbp),%xmm5
paddd 0+64(%rbp),%xmm9
paddd 0+112(%rbp),%xmm13
paddd L$chacha20_consts(%rip),%xmm0
paddd 0+48(%rbp),%xmm4
paddd 0+64(%rbp),%xmm8
paddd 0+96(%rbp),%xmm12
movdqa %xmm12,0+80(%rbp)
movdqu 0 + 0(%rsi),%xmm12
pxor %xmm3,%xmm12
movdqu %xmm12,0 + 0(%rdi)
movdqu 16 + 0(%rsi),%xmm12
pxor %xmm7,%xmm12
movdqu %xmm12,16 + 0(%rdi)
movdqu 32 + 0(%rsi),%xmm12
pxor %xmm11,%xmm12
movdqu %xmm12,32 + 0(%rdi)
movdqu 48 + 0(%rsi),%xmm12
pxor %xmm15,%xmm12
movdqu %xmm12,48 + 0(%rdi)
movdqu 0 + 64(%rsi),%xmm3
movdqu 16 + 64(%rsi),%xmm7
movdqu 32 + 64(%rsi),%xmm11
movdqu 48 + 64(%rsi),%xmm15
pxor %xmm3,%xmm2
pxor %xmm7,%xmm6
pxor %xmm11,%xmm10
pxor %xmm14,%xmm15
movdqu %xmm2,0 + 64(%rdi)
movdqu %xmm6,16 + 64(%rdi)
movdqu %xmm10,32 + 64(%rdi)
movdqu %xmm15,48 + 64(%rdi)
movdqu 0 + 128(%rsi),%xmm3
movdqu 16 + 128(%rsi),%xmm7
movdqu 32 + 128(%rsi),%xmm11
movdqu 48 + 128(%rsi),%xmm15
pxor %xmm3,%xmm1
pxor %xmm7,%xmm5
pxor %xmm11,%xmm9
pxor %xmm13,%xmm15
movdqu %xmm1,0 + 128(%rdi)
movdqu %xmm5,16 + 128(%rdi)
movdqu %xmm9,32 + 128(%rdi)
movdqu %xmm15,48 + 128(%rdi)
movdqa 0+80(%rbp),%xmm12
subq $192,%rbx
leaq 192(%rsi),%rsi
leaq 192(%rdi),%rdi
L$open_sse_tail_64_dec_loop:
cmpq $16,%rbx
jb L$open_sse_tail_16_init
subq $16,%rbx
movdqu (%rsi),%xmm3
pxor %xmm3,%xmm0
movdqu %xmm0,(%rdi)
leaq 16(%rsi),%rsi
leaq 16(%rdi),%rdi
movdqa %xmm4,%xmm0
movdqa %xmm8,%xmm4
movdqa %xmm12,%xmm8
jmp L$open_sse_tail_64_dec_loop
L$open_sse_tail_16_init:
movdqa %xmm0,%xmm1
L$open_sse_tail_16:
testq %rbx,%rbx
jz L$open_sse_finalize
pxor %xmm3,%xmm3
leaq -1(%rsi,%rbx,1),%rsi
movq %rbx,%r8
L$open_sse_tail_16_compose:
pslldq $1,%xmm3
pinsrb $0,(%rsi),%xmm3
subq $1,%rsi
subq $1,%r8
jnz L$open_sse_tail_16_compose
.byte 102,73,15,126,221
pextrq $1,%xmm3,%r14
pxor %xmm1,%xmm3
L$open_sse_tail_16_extract:
pextrb $0,%xmm3,(%rdi)
psrldq $1,%xmm3
addq $1,%rdi
subq $1,%rbx
jne L$open_sse_tail_16_extract
addq %r13,%r10
adcq %r14,%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
L$open_sse_finalize:
addq 0+0+32(%rbp),%r10
adcq 8+0+32(%rbp),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
movq %r10,%r13
movq %r11,%r14
movq %r12,%r15
subq $-5,%r10
sbbq $-1,%r11
sbbq $3,%r12
cmovcq %r13,%r10
cmovcq %r14,%r11
cmovcq %r15,%r12
addq 0+0+16(%rbp),%r10
adcq 8+0+16(%rbp),%r11
addq $288 + 0 + 32,%rsp
popq %r9
movq %r10,(%r9)
movq %r11,8(%r9)
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbx
popq %rbp
ret
L$open_sse_128:
movdqu L$chacha20_consts(%rip),%xmm0
movdqa %xmm0,%xmm1
movdqa %xmm0,%xmm2
movdqu 0(%r9),%xmm4
movdqa %xmm4,%xmm5
movdqa %xmm4,%xmm6
movdqu 16(%r9),%xmm8
movdqa %xmm8,%xmm9
movdqa %xmm8,%xmm10
movdqu 32(%r9),%xmm12
movdqa %xmm12,%xmm13
paddd L$sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm14
paddd L$sse_inc(%rip),%xmm14
movdqa %xmm4,%xmm7
movdqa %xmm8,%xmm11
movdqa %xmm13,%xmm15
movq $10,%r10
L$open_sse_128_rounds:
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb L$rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $12,%xmm3
psrld $20,%xmm6
pxor %xmm3,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb L$rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $7,%xmm3
psrld $25,%xmm6
pxor %xmm3,%xmm6
.byte 102,15,58,15,246,4
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb L$rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $12,%xmm3
psrld $20,%xmm6
pxor %xmm3,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb L$rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $7,%xmm3
psrld $25,%xmm6
pxor %xmm3,%xmm6
.byte 102,15,58,15,246,12
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,4
decq %r10
jnz L$open_sse_128_rounds
paddd L$chacha20_consts(%rip),%xmm0
paddd L$chacha20_consts(%rip),%xmm1
paddd L$chacha20_consts(%rip),%xmm2
paddd %xmm7,%xmm4
paddd %xmm7,%xmm5
paddd %xmm7,%xmm6
paddd %xmm11,%xmm9
paddd %xmm11,%xmm10
paddd %xmm15,%xmm13
paddd L$sse_inc(%rip),%xmm15
paddd %xmm15,%xmm14
pand L$clamp(%rip),%xmm0
movdqa %xmm0,0+0(%rbp)
movdqa %xmm4,0+16(%rbp)
movq %r8,%r8
call poly_hash_ad_internal
L$open_sse_128_xor_hash:
cmpq $16,%rbx
jb L$open_sse_tail_16
subq $16,%rbx
addq 0+0(%rsi),%r10
adcq 8+0(%rsi),%r11
adcq $1,%r12
movdqu 0(%rsi),%xmm3
pxor %xmm3,%xmm1
movdqu %xmm1,0(%rdi)
leaq 16(%rsi),%rsi
leaq 16(%rdi),%rdi
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
movdqa %xmm5,%xmm1
movdqa %xmm9,%xmm5
movdqa %xmm13,%xmm9
movdqa %xmm2,%xmm13
movdqa %xmm6,%xmm2
movdqa %xmm10,%xmm6
movdqa %xmm14,%xmm10
jmp L$open_sse_128_xor_hash
.globl _chacha20_poly1305_seal_sse41
.private_extern _chacha20_poly1305_seal_sse41
.p2align 6
_chacha20_poly1305_seal_sse41:
_CET_ENDBR
pushq %rbp
pushq %rbx
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %r9
subq $288 + 0 + 32,%rsp
leaq 32(%rsp),%rbp
andq $-32,%rbp
movq 56(%r9),%rbx
addq %rdx,%rbx
movq %r8,0+0+32(%rbp)
movq %rbx,8+0+32(%rbp)
movq %rdx,%rbx
cmpq $128,%rbx
jbe L$seal_sse_128
movdqa L$chacha20_consts(%rip),%xmm0
movdqu 0(%r9),%xmm4
movdqu 16(%r9),%xmm8
movdqu 32(%r9),%xmm12
movdqa %xmm0,%xmm1
movdqa %xmm0,%xmm2
movdqa %xmm0,%xmm3
movdqa %xmm4,%xmm5
movdqa %xmm4,%xmm6
movdqa %xmm4,%xmm7
movdqa %xmm8,%xmm9
movdqa %xmm8,%xmm10
movdqa %xmm8,%xmm11
movdqa %xmm12,%xmm15
paddd L$sse_inc(%rip),%xmm12
movdqa %xmm12,%xmm14
paddd L$sse_inc(%rip),%xmm12
movdqa %xmm12,%xmm13
paddd L$sse_inc(%rip),%xmm12
movdqa %xmm4,0+48(%rbp)
movdqa %xmm8,0+64(%rbp)
movdqa %xmm12,0+96(%rbp)
movdqa %xmm13,0+112(%rbp)
movdqa %xmm14,0+128(%rbp)
movdqa %xmm15,0+144(%rbp)
movq $10,%r10
L$seal_sse_init_rounds:
movdqa %xmm8,0+80(%rbp)
movdqa L$rol16(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 0+80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,0+80(%rbp)
movdqa %xmm7,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm4
pxor %xmm8,%xmm4
movdqa L$rol8(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 0+80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,0+80(%rbp)
movdqa %xmm7,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm4
pxor %xmm8,%xmm4
movdqa 0+80(%rbp),%xmm8
.byte 102,15,58,15,255,4
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,12
.byte 102,15,58,15,246,4
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,12
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
movdqa %xmm8,0+80(%rbp)
movdqa L$rol16(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 0+80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,0+80(%rbp)
movdqa %xmm7,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm4
pxor %xmm8,%xmm4
movdqa L$rol8(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 0+80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,0+80(%rbp)
movdqa %xmm7,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm4
pxor %xmm8,%xmm4
movdqa 0+80(%rbp),%xmm8
.byte 102,15,58,15,255,12
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,4
.byte 102,15,58,15,246,12
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,4
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
decq %r10
jnz L$seal_sse_init_rounds
paddd L$chacha20_consts(%rip),%xmm3
paddd 0+48(%rbp),%xmm7
paddd 0+64(%rbp),%xmm11
paddd 0+144(%rbp),%xmm15
paddd L$chacha20_consts(%rip),%xmm2
paddd 0+48(%rbp),%xmm6
paddd 0+64(%rbp),%xmm10
paddd 0+128(%rbp),%xmm14
paddd L$chacha20_consts(%rip),%xmm1
paddd 0+48(%rbp),%xmm5
paddd 0+64(%rbp),%xmm9
paddd 0+112(%rbp),%xmm13
paddd L$chacha20_consts(%rip),%xmm0
paddd 0+48(%rbp),%xmm4
paddd 0+64(%rbp),%xmm8
paddd 0+96(%rbp),%xmm12
pand L$clamp(%rip),%xmm3
movdqa %xmm3,0+0(%rbp)
movdqa %xmm7,0+16(%rbp)
movq %r8,%r8
call poly_hash_ad_internal
movdqu 0 + 0(%rsi),%xmm3
movdqu 16 + 0(%rsi),%xmm7
movdqu 32 + 0(%rsi),%xmm11
movdqu 48 + 0(%rsi),%xmm15
pxor %xmm3,%xmm2
pxor %xmm7,%xmm6
pxor %xmm11,%xmm10
pxor %xmm14,%xmm15
movdqu %xmm2,0 + 0(%rdi)
movdqu %xmm6,16 + 0(%rdi)
movdqu %xmm10,32 + 0(%rdi)
movdqu %xmm15,48 + 0(%rdi)
movdqu 0 + 64(%rsi),%xmm3
movdqu 16 + 64(%rsi),%xmm7
movdqu 32 + 64(%rsi),%xmm11
movdqu 48 + 64(%rsi),%xmm15
pxor %xmm3,%xmm1
pxor %xmm7,%xmm5
pxor %xmm11,%xmm9
pxor %xmm13,%xmm15
movdqu %xmm1,0 + 64(%rdi)
movdqu %xmm5,16 + 64(%rdi)
movdqu %xmm9,32 + 64(%rdi)
movdqu %xmm15,48 + 64(%rdi)
cmpq $192,%rbx
ja L$seal_sse_main_init
movq $128,%rcx
subq $128,%rbx
leaq 128(%rsi),%rsi
jmp L$seal_sse_128_tail_hash
L$seal_sse_main_init:
movdqu 0 + 128(%rsi),%xmm3
movdqu 16 + 128(%rsi),%xmm7
movdqu 32 + 128(%rsi),%xmm11
movdqu 48 + 128(%rsi),%xmm15
pxor %xmm3,%xmm0
pxor %xmm7,%xmm4
pxor %xmm11,%xmm8
pxor %xmm12,%xmm15
movdqu %xmm0,0 + 128(%rdi)
movdqu %xmm4,16 + 128(%rdi)
movdqu %xmm8,32 + 128(%rdi)
movdqu %xmm15,48 + 128(%rdi)
movq $192,%rcx
subq $192,%rbx
leaq 192(%rsi),%rsi
movq $2,%rcx
movq $8,%r8
cmpq $64,%rbx
jbe L$seal_sse_tail_64
cmpq $128,%rbx
jbe L$seal_sse_tail_128
cmpq $192,%rbx
jbe L$seal_sse_tail_192
L$seal_sse_main_loop:
movdqa L$chacha20_consts(%rip),%xmm0
movdqa 0+48(%rbp),%xmm4
movdqa 0+64(%rbp),%xmm8
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm5
movdqa %xmm8,%xmm9
movdqa %xmm0,%xmm2
movdqa %xmm4,%xmm6
movdqa %xmm8,%xmm10
movdqa %xmm0,%xmm3
movdqa %xmm4,%xmm7
movdqa %xmm8,%xmm11
movdqa 0+96(%rbp),%xmm15
paddd L$sse_inc(%rip),%xmm15
movdqa %xmm15,%xmm14
paddd L$sse_inc(%rip),%xmm14
movdqa %xmm14,%xmm13
paddd L$sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm12
paddd L$sse_inc(%rip),%xmm12
movdqa %xmm12,0+96(%rbp)
movdqa %xmm13,0+112(%rbp)
movdqa %xmm14,0+128(%rbp)
movdqa %xmm15,0+144(%rbp)
.p2align 5
L$seal_sse_main_rounds:
movdqa %xmm8,0+80(%rbp)
movdqa L$rol16(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 0+80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
addq 0+0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,0+80(%rbp)
movdqa %xmm7,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm4
pxor %xmm8,%xmm4
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movdqa L$rol8(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 0+80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,0+80(%rbp)
movdqa %xmm7,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm4
pxor %xmm8,%xmm4
movdqa 0+80(%rbp),%xmm8
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
.byte 102,15,58,15,255,4
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,12
.byte 102,15,58,15,246,4
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,12
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
movdqa %xmm8,0+80(%rbp)
movdqa L$rol16(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 0+80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,0+80(%rbp)
movdqa %xmm7,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $20,%xmm8
pslld $32-20,%xmm4
pxor %xmm8,%xmm4
movdqa L$rol8(%rip),%xmm8
paddd %xmm7,%xmm3
paddd %xmm6,%xmm2
paddd %xmm5,%xmm1
paddd %xmm4,%xmm0
pxor %xmm3,%xmm15
pxor %xmm2,%xmm14
pxor %xmm1,%xmm13
pxor %xmm0,%xmm12
.byte 102,69,15,56,0,248
.byte 102,69,15,56,0,240
.byte 102,69,15,56,0,232
.byte 102,69,15,56,0,224
movdqa 0+80(%rbp),%xmm8
paddd %xmm15,%xmm11
paddd %xmm14,%xmm10
paddd %xmm13,%xmm9
paddd %xmm12,%xmm8
pxor %xmm11,%xmm7
pxor %xmm10,%xmm6
pxor %xmm9,%xmm5
pxor %xmm8,%xmm4
movdqa %xmm8,0+80(%rbp)
movdqa %xmm7,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm7
pxor %xmm8,%xmm7
movdqa %xmm6,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm6
pxor %xmm8,%xmm6
movdqa %xmm5,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm5
pxor %xmm8,%xmm5
movdqa %xmm4,%xmm8
psrld $25,%xmm8
pslld $32-25,%xmm4
pxor %xmm8,%xmm4
movdqa 0+80(%rbp),%xmm8
.byte 102,15,58,15,255,12
.byte 102,69,15,58,15,219,8
.byte 102,69,15,58,15,255,4
.byte 102,15,58,15,246,12
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,4
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
leaq 16(%rdi),%rdi
decq %r8
jge L$seal_sse_main_rounds
addq 0+0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rdi),%rdi
decq %rcx
jg L$seal_sse_main_rounds
paddd L$chacha20_consts(%rip),%xmm3
paddd 0+48(%rbp),%xmm7
paddd 0+64(%rbp),%xmm11
paddd 0+144(%rbp),%xmm15
paddd L$chacha20_consts(%rip),%xmm2
paddd 0+48(%rbp),%xmm6
paddd 0+64(%rbp),%xmm10
paddd 0+128(%rbp),%xmm14
paddd L$chacha20_consts(%rip),%xmm1
paddd 0+48(%rbp),%xmm5
paddd 0+64(%rbp),%xmm9
paddd 0+112(%rbp),%xmm13
paddd L$chacha20_consts(%rip),%xmm0
paddd 0+48(%rbp),%xmm4
paddd 0+64(%rbp),%xmm8
paddd 0+96(%rbp),%xmm12
movdqa %xmm14,0+80(%rbp)
movdqa %xmm14,0+80(%rbp)
movdqu 0 + 0(%rsi),%xmm14
pxor %xmm3,%xmm14
movdqu %xmm14,0 + 0(%rdi)
movdqu 16 + 0(%rsi),%xmm14
pxor %xmm7,%xmm14
movdqu %xmm14,16 + 0(%rdi)
movdqu 32 + 0(%rsi),%xmm14
pxor %xmm11,%xmm14
movdqu %xmm14,32 + 0(%rdi)
movdqu 48 + 0(%rsi),%xmm14
pxor %xmm15,%xmm14
movdqu %xmm14,48 + 0(%rdi)
movdqa 0+80(%rbp),%xmm14
movdqu 0 + 64(%rsi),%xmm3
movdqu 16 + 64(%rsi),%xmm7
movdqu 32 + 64(%rsi),%xmm11
movdqu 48 + 64(%rsi),%xmm15
pxor %xmm3,%xmm2
pxor %xmm7,%xmm6
pxor %xmm11,%xmm10
pxor %xmm14,%xmm15
movdqu %xmm2,0 + 64(%rdi)
movdqu %xmm6,16 + 64(%rdi)
movdqu %xmm10,32 + 64(%rdi)
movdqu %xmm15,48 + 64(%rdi)
movdqu 0 + 128(%rsi),%xmm3
movdqu 16 + 128(%rsi),%xmm7
movdqu 32 + 128(%rsi),%xmm11
movdqu 48 + 128(%rsi),%xmm15
pxor %xmm3,%xmm1
pxor %xmm7,%xmm5
pxor %xmm11,%xmm9
pxor %xmm13,%xmm15
movdqu %xmm1,0 + 128(%rdi)
movdqu %xmm5,16 + 128(%rdi)
movdqu %xmm9,32 + 128(%rdi)
movdqu %xmm15,48 + 128(%rdi)
cmpq $256,%rbx
ja L$seal_sse_main_loop_xor
movq $192,%rcx
subq $192,%rbx
leaq 192(%rsi),%rsi
jmp L$seal_sse_128_tail_hash
L$seal_sse_main_loop_xor:
movdqu 0 + 192(%rsi),%xmm3
movdqu 16 + 192(%rsi),%xmm7
movdqu 32 + 192(%rsi),%xmm11
movdqu 48 + 192(%rsi),%xmm15
pxor %xmm3,%xmm0
pxor %xmm7,%xmm4
pxor %xmm11,%xmm8
pxor %xmm12,%xmm15
movdqu %xmm0,0 + 192(%rdi)
movdqu %xmm4,16 + 192(%rdi)
movdqu %xmm8,32 + 192(%rdi)
movdqu %xmm15,48 + 192(%rdi)
leaq 256(%rsi),%rsi
subq $256,%rbx
movq $6,%rcx
movq $4,%r8
cmpq $192,%rbx
jg L$seal_sse_main_loop
movq %rbx,%rcx
testq %rbx,%rbx
je L$seal_sse_128_tail_hash
movq $6,%rcx
cmpq $128,%rbx
ja L$seal_sse_tail_192
cmpq $64,%rbx
ja L$seal_sse_tail_128
L$seal_sse_tail_64:
movdqa L$chacha20_consts(%rip),%xmm0
movdqa 0+48(%rbp),%xmm4
movdqa 0+64(%rbp),%xmm8
movdqa 0+96(%rbp),%xmm12
paddd L$sse_inc(%rip),%xmm12
movdqa %xmm12,0+96(%rbp)
L$seal_sse_tail_64_rounds_and_x2hash:
addq 0+0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rdi),%rdi
L$seal_sse_tail_64_rounds_and_x1hash:
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
addq 0+0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rdi),%rdi
decq %rcx
jg L$seal_sse_tail_64_rounds_and_x2hash
decq %r8
jge L$seal_sse_tail_64_rounds_and_x1hash
paddd L$chacha20_consts(%rip),%xmm0
paddd 0+48(%rbp),%xmm4
paddd 0+64(%rbp),%xmm8
paddd 0+96(%rbp),%xmm12
jmp L$seal_sse_128_tail_xor
L$seal_sse_tail_128:
movdqa L$chacha20_consts(%rip),%xmm0
movdqa 0+48(%rbp),%xmm4
movdqa 0+64(%rbp),%xmm8
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm5
movdqa %xmm8,%xmm9
movdqa 0+96(%rbp),%xmm13
paddd L$sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm12
paddd L$sse_inc(%rip),%xmm12
movdqa %xmm12,0+96(%rbp)
movdqa %xmm13,0+112(%rbp)
L$seal_sse_tail_128_rounds_and_x2hash:
addq 0+0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rdi),%rdi
L$seal_sse_tail_128_rounds_and_x1hash:
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
addq 0+0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,12
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,4
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,12
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,4
leaq 16(%rdi),%rdi
decq %rcx
jg L$seal_sse_tail_128_rounds_and_x2hash
decq %r8
jge L$seal_sse_tail_128_rounds_and_x1hash
paddd L$chacha20_consts(%rip),%xmm1
paddd 0+48(%rbp),%xmm5
paddd 0+64(%rbp),%xmm9
paddd 0+112(%rbp),%xmm13
paddd L$chacha20_consts(%rip),%xmm0
paddd 0+48(%rbp),%xmm4
paddd 0+64(%rbp),%xmm8
paddd 0+96(%rbp),%xmm12
movdqu 0 + 0(%rsi),%xmm3
movdqu 16 + 0(%rsi),%xmm7
movdqu 32 + 0(%rsi),%xmm11
movdqu 48 + 0(%rsi),%xmm15
pxor %xmm3,%xmm1
pxor %xmm7,%xmm5
pxor %xmm11,%xmm9
pxor %xmm13,%xmm15
movdqu %xmm1,0 + 0(%rdi)
movdqu %xmm5,16 + 0(%rdi)
movdqu %xmm9,32 + 0(%rdi)
movdqu %xmm15,48 + 0(%rdi)
movq $64,%rcx
subq $64,%rbx
leaq 64(%rsi),%rsi
jmp L$seal_sse_128_tail_hash
L$seal_sse_tail_192:
movdqa L$chacha20_consts(%rip),%xmm0
movdqa 0+48(%rbp),%xmm4
movdqa 0+64(%rbp),%xmm8
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm5
movdqa %xmm8,%xmm9
movdqa %xmm0,%xmm2
movdqa %xmm4,%xmm6
movdqa %xmm8,%xmm10
movdqa 0+96(%rbp),%xmm14
paddd L$sse_inc(%rip),%xmm14
movdqa %xmm14,%xmm13
paddd L$sse_inc(%rip),%xmm13
movdqa %xmm13,%xmm12
paddd L$sse_inc(%rip),%xmm12
movdqa %xmm12,0+96(%rbp)
movdqa %xmm13,0+112(%rbp)
movdqa %xmm14,0+128(%rbp)
L$seal_sse_tail_192_rounds_and_x2hash:
addq 0+0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15
movq 8+0+0(%rbp),%rax
movq %rax,%r9
mulq %r10
addq %rax,%r14
adcq $0,%rdx
movq %rdx,%r10
movq 8+0+0(%rbp),%rax
mulq %r11
addq %rax,%r15
adcq $0,%rdx
imulq %r12,%r9
addq %r10,%r15
adcq %rdx,%r9
movq %r13,%r10
movq %r14,%r11
movq %r15,%r12
andq $3,%r12
movq %r15,%r13
andq $-4,%r13
movq %r9,%r14
shrdq $2,%r9,%r15
shrq $2,%r9
addq %r13,%r15
adcq %r14,%r9
addq %r15,%r10
adcq %r9,%r11
adcq $0,%r12
leaq 16(%rdi),%rdi
L$seal_sse_tail_192_rounds_and_x1hash:
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol16(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $12,%xmm3
psrld $20,%xmm4
pxor %xmm3,%xmm4
paddd %xmm4,%xmm0
pxor %xmm0,%xmm12
pshufb L$rol8(%rip),%xmm12
paddd %xmm12,%xmm8
pxor %xmm8,%xmm4
movdqa %xmm4,%xmm3
pslld $7,%xmm3
psrld $25,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,15,228,4
.byte 102,69,15,58,15,192,8
.byte 102,69,15,58,15,228,12
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol16(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $12,%xmm3
psrld $20,%xmm5
pxor %xmm3,%xmm5
paddd %xmm5,%xmm1
pxor %xmm1,%xmm13
pshufb L$rol8(%rip),%xmm13
paddd %xmm13,%xmm9
pxor %xmm9,%xmm5
movdqa %xmm5,%xmm3
pslld $7,%xmm3
psrld $25,%xmm5
pxor %xmm3,%xmm5
.byte 102,15,58,15,237,4
.byte 102,69,15,58,15,201,8
.byte 102,69,15,58,15,237,12
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb L$rol16(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $12,%xmm3
psrld $20,%xmm6
pxor %xmm3,%xmm6
paddd %xmm6,%xmm2
pxor %xmm2,%xmm14
pshufb L$rol8(%rip),%xmm14
paddd %xmm14,%xmm10
pxor %xmm10,%xmm6
movdqa %xmm6,%xmm3
pslld $7,%xmm3
psrld $25,%xmm6
pxor %xmm3,%xmm6
.byte 102,15,58,15,246,4
.byte 102,69,15,58,15,210,8
.byte 102,69,15,58,15,246,12
addq 0+0(%rdi),%r10
adcq 8+0(%rdi),%r11
adcq $1,%r12
movq 0+0+0(%rbp),%rax
movq %rax,%r15
mulq %r10
movq %rax,%r13
movq %rdx,%r14
movq 0+0+0(%rbp),%rax
mulq %r11
imulq %r12,%r15
addq %rax,%r14
adcq %rdx,%r15