| // This file is generated from a similarly-named Perl script in the BoringSSL | 
 | // source tree. Do not edit by hand. | 
 |  | 
 | #include <openssl/asm_base.h> | 
 |  | 
 | #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) | 
 | .section	.rodata | 
 | .align	64 | 
 | chacha20_poly1305_constants: | 
 | .Lchacha20_consts: | 
 | .byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' | 
 | .byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' | 
 | .Lrol8: | 
 | .byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 | 
 | .byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 | 
 | .Lrol16: | 
 | .byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 | 
 | .byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 | 
 | .Lavx2_init: | 
 | .long	0,0,0,0 | 
 | .Lsse_inc: | 
 | .long	1,0,0,0 | 
 | .Lavx2_inc: | 
 | .long	2,0,0,0,2,0,0,0 | 
 | .Lclamp: | 
 | .quad	0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC | 
 | .quad	0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF | 
 | .align	16 | 
 | .Land_masks: | 
 | .byte	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | 
 | .byte	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | 
 | .byte	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | 
 | .byte	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | 
 | .byte	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | 
 | .byte	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | 
 | .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | 
 | .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | 
 | .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | 
 | .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 | 
 | .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 | 
 | .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 | 
 | .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 | 
 | .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 | 
 | .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 | 
 | .byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff | 
 | .text	 | 
 |  | 
 | .type	poly_hash_ad_internal,@function | 
 | .align	64 | 
 | poly_hash_ad_internal: | 
 | .cfi_startproc	 | 
 | .cfi_def_cfa	rsp, 8 | 
 | 	xorq	%r10,%r10 | 
 | 	xorq	%r11,%r11 | 
 | 	xorq	%r12,%r12 | 
 | 	cmpq	$13,%r8 | 
 | 	jne	.Lhash_ad_loop | 
 | .Lpoly_fast_tls_ad: | 
 |  | 
 | 	movq	(%rcx),%r10 | 
 | 	movq	5(%rcx),%r11 | 
 | 	shrq	$24,%r11 | 
 | 	movq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	ret | 
 | .Lhash_ad_loop: | 
 |  | 
 | 	cmpq	$16,%r8 | 
 | 	jb	.Lhash_ad_tail | 
 | 	addq	0+0(%rcx),%r10 | 
 | 	adcq	8+0(%rcx),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%rcx),%rcx | 
 | 	subq	$16,%r8 | 
 | 	jmp	.Lhash_ad_loop | 
 | .Lhash_ad_tail: | 
 | 	cmpq	$0,%r8 | 
 | 	je	.Lhash_ad_done | 
 |  | 
 | 	xorq	%r13,%r13 | 
 | 	xorq	%r14,%r14 | 
 | 	xorq	%r15,%r15 | 
 | 	addq	%r8,%rcx | 
 | .Lhash_ad_tail_loop: | 
 | 	shldq	$8,%r13,%r14 | 
 | 	shlq	$8,%r13 | 
 | 	movzbq	-1(%rcx),%r15 | 
 | 	xorq	%r15,%r13 | 
 | 	decq	%rcx | 
 | 	decq	%r8 | 
 | 	jne	.Lhash_ad_tail_loop | 
 |  | 
 | 	addq	%r13,%r10 | 
 | 	adcq	%r14,%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 |  | 
 | .Lhash_ad_done: | 
 | 	ret | 
 | .cfi_endproc	 | 
 | .size	poly_hash_ad_internal, .-poly_hash_ad_internal | 
 |  | 
 | .globl	chacha20_poly1305_open_sse41 | 
 | .hidden chacha20_poly1305_open_sse41 | 
 | .type	chacha20_poly1305_open_sse41,@function | 
 | .align	64 | 
 | chacha20_poly1305_open_sse41: | 
 | .cfi_startproc	 | 
 | _CET_ENDBR | 
 | 	pushq	%rbp | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%rbp,-16 | 
 | 	pushq	%rbx | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%rbx,-24 | 
 | 	pushq	%r12 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r12,-32 | 
 | 	pushq	%r13 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r13,-40 | 
 | 	pushq	%r14 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r14,-48 | 
 | 	pushq	%r15 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r15,-56 | 
 |  | 
 |  | 
 | 	pushq	%r9 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r9,-64 | 
 | 	subq	$288 + 0 + 32,%rsp | 
 | .cfi_adjust_cfa_offset	288 + 32 | 
 |  | 
 | 	leaq	32(%rsp),%rbp | 
 | 	andq	$-32,%rbp | 
 |  | 
 | 	movq	%rdx,%rbx | 
 | 	movq	%r8,0+0+32(%rbp) | 
 | 	movq	%rbx,8+0+32(%rbp) | 
 |  | 
 | 	cmpq	$128,%rbx | 
 | 	jbe	.Lopen_sse_128 | 
 |  | 
 | 	movdqa	.Lchacha20_consts(%rip),%xmm0 | 
 | 	movdqu	0(%r9),%xmm4 | 
 | 	movdqu	16(%r9),%xmm8 | 
 | 	movdqu	32(%r9),%xmm12 | 
 |  | 
 | 	movdqa	%xmm12,%xmm7 | 
 |  | 
 | 	movdqa	%xmm4,0+48(%rbp) | 
 | 	movdqa	%xmm8,0+64(%rbp) | 
 | 	movdqa	%xmm12,0+96(%rbp) | 
 | 	movq	$10,%r10 | 
 | .Lopen_sse_init_rounds: | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,4 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,12 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,12 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,4 | 
 |  | 
 | 	decq	%r10 | 
 | 	jne	.Lopen_sse_init_rounds | 
 |  | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm0 | 
 | 	paddd	0+48(%rbp),%xmm4 | 
 |  | 
 | 	pand	.Lclamp(%rip),%xmm0 | 
 | 	movdqa	%xmm0,0+0(%rbp) | 
 | 	movdqa	%xmm4,0+16(%rbp) | 
 |  | 
 | 	movq	%r8,%r8 | 
 | 	call	poly_hash_ad_internal | 
 | .Lopen_sse_main_loop: | 
 | 	cmpq	$256,%rbx | 
 | 	jb	.Lopen_sse_tail | 
 |  | 
 | 	movdqa	.Lchacha20_consts(%rip),%xmm0 | 
 | 	movdqa	0+48(%rbp),%xmm4 | 
 | 	movdqa	0+64(%rbp),%xmm8 | 
 | 	movdqa	%xmm0,%xmm1 | 
 | 	movdqa	%xmm4,%xmm5 | 
 | 	movdqa	%xmm8,%xmm9 | 
 | 	movdqa	%xmm0,%xmm2 | 
 | 	movdqa	%xmm4,%xmm6 | 
 | 	movdqa	%xmm8,%xmm10 | 
 | 	movdqa	%xmm0,%xmm3 | 
 | 	movdqa	%xmm4,%xmm7 | 
 | 	movdqa	%xmm8,%xmm11 | 
 | 	movdqa	0+96(%rbp),%xmm15 | 
 | 	paddd	.Lsse_inc(%rip),%xmm15 | 
 | 	movdqa	%xmm15,%xmm14 | 
 | 	paddd	.Lsse_inc(%rip),%xmm14 | 
 | 	movdqa	%xmm14,%xmm13 | 
 | 	paddd	.Lsse_inc(%rip),%xmm13 | 
 | 	movdqa	%xmm13,%xmm12 | 
 | 	paddd	.Lsse_inc(%rip),%xmm12 | 
 | 	movdqa	%xmm12,0+96(%rbp) | 
 | 	movdqa	%xmm13,0+112(%rbp) | 
 | 	movdqa	%xmm14,0+128(%rbp) | 
 | 	movdqa	%xmm15,0+144(%rbp) | 
 |  | 
 |  | 
 |  | 
 | 	movq	$4,%rcx | 
 | 	movq	%rsi,%r8 | 
 | .Lopen_sse_main_loop_rounds: | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	.Lrol16(%rip),%xmm8 | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pxor	%xmm0,%xmm12 | 
 | .byte	102,69,15,56,0,248 | 
 | .byte	102,69,15,56,0,240 | 
 | .byte	102,69,15,56,0,232 | 
 | .byte	102,69,15,56,0,224 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	addq	0+0(%r8),%r10 | 
 | 	adcq	8+0(%r8),%r11 | 
 | 	adcq	$1,%r12 | 
 |  | 
 | 	leaq	16(%r8),%r8 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	%xmm7,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm7 | 
 | 	pxor	%xmm8,%xmm7 | 
 | 	movdqa	%xmm6,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm6 | 
 | 	pxor	%xmm8,%xmm6 | 
 | 	movdqa	%xmm5,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm5 | 
 | 	pxor	%xmm8,%xmm5 | 
 | 	movdqa	%xmm4,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm4 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movdqa	.Lrol8(%rip),%xmm8 | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pxor	%xmm0,%xmm12 | 
 | .byte	102,69,15,56,0,248 | 
 | .byte	102,69,15,56,0,240 | 
 | .byte	102,69,15,56,0,232 | 
 | .byte	102,69,15,56,0,224 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	%xmm7,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm7 | 
 | 	pxor	%xmm8,%xmm7 | 
 | 	movdqa	%xmm6,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm6 | 
 | 	pxor	%xmm8,%xmm6 | 
 | 	movdqa	%xmm5,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm5 | 
 | 	pxor	%xmm8,%xmm5 | 
 | 	movdqa	%xmm4,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm4 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | .byte	102,15,58,15,255,4 | 
 | .byte	102,69,15,58,15,219,8 | 
 | .byte	102,69,15,58,15,255,12 | 
 | .byte	102,15,58,15,246,4 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,12 | 
 | .byte	102,15,58,15,237,4 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,12 | 
 | .byte	102,15,58,15,228,4 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,12 | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	.Lrol16(%rip),%xmm8 | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pxor	%xmm0,%xmm12 | 
 | .byte	102,69,15,56,0,248 | 
 | .byte	102,69,15,56,0,240 | 
 | .byte	102,69,15,56,0,232 | 
 | .byte	102,69,15,56,0,224 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	%xmm7,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm7 | 
 | 	pxor	%xmm8,%xmm7 | 
 | 	movdqa	%xmm6,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm6 | 
 | 	pxor	%xmm8,%xmm6 | 
 | 	movdqa	%xmm5,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm5 | 
 | 	pxor	%xmm8,%xmm5 | 
 | 	movdqa	%xmm4,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm4 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	.Lrol8(%rip),%xmm8 | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pxor	%xmm0,%xmm12 | 
 | .byte	102,69,15,56,0,248 | 
 | .byte	102,69,15,56,0,240 | 
 | .byte	102,69,15,56,0,232 | 
 | .byte	102,69,15,56,0,224 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	%xmm7,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm7 | 
 | 	pxor	%xmm8,%xmm7 | 
 | 	movdqa	%xmm6,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm6 | 
 | 	pxor	%xmm8,%xmm6 | 
 | 	movdqa	%xmm5,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm5 | 
 | 	pxor	%xmm8,%xmm5 | 
 | 	movdqa	%xmm4,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm4 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | .byte	102,15,58,15,255,12 | 
 | .byte	102,69,15,58,15,219,8 | 
 | .byte	102,69,15,58,15,255,4 | 
 | .byte	102,15,58,15,246,12 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,4 | 
 | .byte	102,15,58,15,237,12 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,4 | 
 | .byte	102,15,58,15,228,12 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,4 | 
 |  | 
 | 	decq	%rcx | 
 | 	jge	.Lopen_sse_main_loop_rounds | 
 | 	addq	0+0(%r8),%r10 | 
 | 	adcq	8+0(%r8),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%r8),%r8 | 
 | 	cmpq	$-6,%rcx | 
 | 	jg	.Lopen_sse_main_loop_rounds | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm3 | 
 | 	paddd	0+48(%rbp),%xmm7 | 
 | 	paddd	0+64(%rbp),%xmm11 | 
 | 	paddd	0+144(%rbp),%xmm15 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm2 | 
 | 	paddd	0+48(%rbp),%xmm6 | 
 | 	paddd	0+64(%rbp),%xmm10 | 
 | 	paddd	0+128(%rbp),%xmm14 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm1 | 
 | 	paddd	0+48(%rbp),%xmm5 | 
 | 	paddd	0+64(%rbp),%xmm9 | 
 | 	paddd	0+112(%rbp),%xmm13 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm0 | 
 | 	paddd	0+48(%rbp),%xmm4 | 
 | 	paddd	0+64(%rbp),%xmm8 | 
 | 	paddd	0+96(%rbp),%xmm12 | 
 | 	movdqa	%xmm12,0+80(%rbp) | 
 | 	movdqu	0 + 0(%rsi),%xmm12 | 
 | 	pxor	%xmm3,%xmm12 | 
 | 	movdqu	%xmm12,0 + 0(%rdi) | 
 | 	movdqu	16 + 0(%rsi),%xmm12 | 
 | 	pxor	%xmm7,%xmm12 | 
 | 	movdqu	%xmm12,16 + 0(%rdi) | 
 | 	movdqu	32 + 0(%rsi),%xmm12 | 
 | 	pxor	%xmm11,%xmm12 | 
 | 	movdqu	%xmm12,32 + 0(%rdi) | 
 | 	movdqu	48 + 0(%rsi),%xmm12 | 
 | 	pxor	%xmm15,%xmm12 | 
 | 	movdqu	%xmm12,48 + 0(%rdi) | 
 | 	movdqu	0 + 64(%rsi),%xmm3 | 
 | 	movdqu	16 + 64(%rsi),%xmm7 | 
 | 	movdqu	32 + 64(%rsi),%xmm11 | 
 | 	movdqu	48 + 64(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm2 | 
 | 	pxor	%xmm7,%xmm6 | 
 | 	pxor	%xmm11,%xmm10 | 
 | 	pxor	%xmm14,%xmm15 | 
 | 	movdqu	%xmm2,0 + 64(%rdi) | 
 | 	movdqu	%xmm6,16 + 64(%rdi) | 
 | 	movdqu	%xmm10,32 + 64(%rdi) | 
 | 	movdqu	%xmm15,48 + 64(%rdi) | 
 | 	movdqu	0 + 128(%rsi),%xmm3 | 
 | 	movdqu	16 + 128(%rsi),%xmm7 | 
 | 	movdqu	32 + 128(%rsi),%xmm11 | 
 | 	movdqu	48 + 128(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm1 | 
 | 	pxor	%xmm7,%xmm5 | 
 | 	pxor	%xmm11,%xmm9 | 
 | 	pxor	%xmm13,%xmm15 | 
 | 	movdqu	%xmm1,0 + 128(%rdi) | 
 | 	movdqu	%xmm5,16 + 128(%rdi) | 
 | 	movdqu	%xmm9,32 + 128(%rdi) | 
 | 	movdqu	%xmm15,48 + 128(%rdi) | 
 | 	movdqu	0 + 192(%rsi),%xmm3 | 
 | 	movdqu	16 + 192(%rsi),%xmm7 | 
 | 	movdqu	32 + 192(%rsi),%xmm11 | 
 | 	movdqu	48 + 192(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm0 | 
 | 	pxor	%xmm7,%xmm4 | 
 | 	pxor	%xmm11,%xmm8 | 
 | 	pxor	0+80(%rbp),%xmm15 | 
 | 	movdqu	%xmm0,0 + 192(%rdi) | 
 | 	movdqu	%xmm4,16 + 192(%rdi) | 
 | 	movdqu	%xmm8,32 + 192(%rdi) | 
 | 	movdqu	%xmm15,48 + 192(%rdi) | 
 |  | 
 | 	leaq	256(%rsi),%rsi | 
 | 	leaq	256(%rdi),%rdi | 
 | 	subq	$256,%rbx | 
 | 	jmp	.Lopen_sse_main_loop | 
 | .Lopen_sse_tail: | 
 |  | 
 | 	testq	%rbx,%rbx | 
 | 	jz	.Lopen_sse_finalize | 
 | 	cmpq	$192,%rbx | 
 | 	ja	.Lopen_sse_tail_256 | 
 | 	cmpq	$128,%rbx | 
 | 	ja	.Lopen_sse_tail_192 | 
 | 	cmpq	$64,%rbx | 
 | 	ja	.Lopen_sse_tail_128 | 
 | 	movdqa	.Lchacha20_consts(%rip),%xmm0 | 
 | 	movdqa	0+48(%rbp),%xmm4 | 
 | 	movdqa	0+64(%rbp),%xmm8 | 
 | 	movdqa	0+96(%rbp),%xmm12 | 
 | 	paddd	.Lsse_inc(%rip),%xmm12 | 
 | 	movdqa	%xmm12,0+96(%rbp) | 
 |  | 
 | 	xorq	%r8,%r8 | 
 | 	movq	%rbx,%rcx | 
 | 	cmpq	$16,%rcx | 
 | 	jb	.Lopen_sse_tail_64_rounds | 
 | .Lopen_sse_tail_64_rounds_and_x1hash: | 
 | 	addq	0+0(%rsi,%r8,1),%r10 | 
 | 	adcq	8+0(%rsi,%r8,1),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	subq	$16,%rcx | 
 | .Lopen_sse_tail_64_rounds: | 
 | 	addq	$16,%r8 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,4 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,12 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,12 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,4 | 
 |  | 
 | 	cmpq	$16,%rcx | 
 | 	jae	.Lopen_sse_tail_64_rounds_and_x1hash | 
 | 	cmpq	$160,%r8 | 
 | 	jne	.Lopen_sse_tail_64_rounds | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm0 | 
 | 	paddd	0+48(%rbp),%xmm4 | 
 | 	paddd	0+64(%rbp),%xmm8 | 
 | 	paddd	0+96(%rbp),%xmm12 | 
 |  | 
 | 	jmp	.Lopen_sse_tail_64_dec_loop | 
 |  | 
 | .Lopen_sse_tail_128: | 
 | 	movdqa	.Lchacha20_consts(%rip),%xmm0 | 
 | 	movdqa	0+48(%rbp),%xmm4 | 
 | 	movdqa	0+64(%rbp),%xmm8 | 
 | 	movdqa	%xmm0,%xmm1 | 
 | 	movdqa	%xmm4,%xmm5 | 
 | 	movdqa	%xmm8,%xmm9 | 
 | 	movdqa	0+96(%rbp),%xmm13 | 
 | 	paddd	.Lsse_inc(%rip),%xmm13 | 
 | 	movdqa	%xmm13,%xmm12 | 
 | 	paddd	.Lsse_inc(%rip),%xmm12 | 
 | 	movdqa	%xmm12,0+96(%rbp) | 
 | 	movdqa	%xmm13,0+112(%rbp) | 
 |  | 
 | 	movq	%rbx,%rcx | 
 | 	andq	$-16,%rcx | 
 | 	xorq	%r8,%r8 | 
 | .Lopen_sse_tail_128_rounds_and_x1hash: | 
 | 	addq	0+0(%rsi,%r8,1),%r10 | 
 | 	adcq	8+0(%rsi,%r8,1),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | .Lopen_sse_tail_128_rounds: | 
 | 	addq	$16,%r8 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,4 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,12 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol16(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol8(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | .byte	102,15,58,15,237,4 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,12 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,12 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,4 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol16(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol8(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | .byte	102,15,58,15,237,12 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,4 | 
 |  | 
 | 	cmpq	%rcx,%r8 | 
 | 	jb	.Lopen_sse_tail_128_rounds_and_x1hash | 
 | 	cmpq	$160,%r8 | 
 | 	jne	.Lopen_sse_tail_128_rounds | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm1 | 
 | 	paddd	0+48(%rbp),%xmm5 | 
 | 	paddd	0+64(%rbp),%xmm9 | 
 | 	paddd	0+112(%rbp),%xmm13 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm0 | 
 | 	paddd	0+48(%rbp),%xmm4 | 
 | 	paddd	0+64(%rbp),%xmm8 | 
 | 	paddd	0+96(%rbp),%xmm12 | 
 | 	movdqu	0 + 0(%rsi),%xmm3 | 
 | 	movdqu	16 + 0(%rsi),%xmm7 | 
 | 	movdqu	32 + 0(%rsi),%xmm11 | 
 | 	movdqu	48 + 0(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm1 | 
 | 	pxor	%xmm7,%xmm5 | 
 | 	pxor	%xmm11,%xmm9 | 
 | 	pxor	%xmm13,%xmm15 | 
 | 	movdqu	%xmm1,0 + 0(%rdi) | 
 | 	movdqu	%xmm5,16 + 0(%rdi) | 
 | 	movdqu	%xmm9,32 + 0(%rdi) | 
 | 	movdqu	%xmm15,48 + 0(%rdi) | 
 |  | 
 | 	subq	$64,%rbx | 
 | 	leaq	64(%rsi),%rsi | 
 | 	leaq	64(%rdi),%rdi | 
 | 	jmp	.Lopen_sse_tail_64_dec_loop | 
 |  | 
 | .Lopen_sse_tail_192: | 
 | 	movdqa	.Lchacha20_consts(%rip),%xmm0 | 
 | 	movdqa	0+48(%rbp),%xmm4 | 
 | 	movdqa	0+64(%rbp),%xmm8 | 
 | 	movdqa	%xmm0,%xmm1 | 
 | 	movdqa	%xmm4,%xmm5 | 
 | 	movdqa	%xmm8,%xmm9 | 
 | 	movdqa	%xmm0,%xmm2 | 
 | 	movdqa	%xmm4,%xmm6 | 
 | 	movdqa	%xmm8,%xmm10 | 
 | 	movdqa	0+96(%rbp),%xmm14 | 
 | 	paddd	.Lsse_inc(%rip),%xmm14 | 
 | 	movdqa	%xmm14,%xmm13 | 
 | 	paddd	.Lsse_inc(%rip),%xmm13 | 
 | 	movdqa	%xmm13,%xmm12 | 
 | 	paddd	.Lsse_inc(%rip),%xmm12 | 
 | 	movdqa	%xmm12,0+96(%rbp) | 
 | 	movdqa	%xmm13,0+112(%rbp) | 
 | 	movdqa	%xmm14,0+128(%rbp) | 
 |  | 
 | 	movq	%rbx,%rcx | 
 | 	movq	$160,%r8 | 
 | 	cmpq	$160,%rcx | 
 | 	cmovgq	%r8,%rcx | 
 | 	andq	$-16,%rcx | 
 | 	xorq	%r8,%r8 | 
 | .Lopen_sse_tail_192_rounds_and_x1hash: | 
 | 	addq	0+0(%rsi,%r8,1),%r10 | 
 | 	adcq	8+0(%rsi,%r8,1),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | .Lopen_sse_tail_192_rounds: | 
 | 	addq	$16,%r8 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,4 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,12 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol16(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol8(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | .byte	102,15,58,15,237,4 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,12 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol16(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol8(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | .byte	102,15,58,15,246,4 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,12 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,12 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,4 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol16(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol8(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | .byte	102,15,58,15,237,12 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,4 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol16(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol8(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | .byte	102,15,58,15,246,12 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,4 | 
 |  | 
 | 	cmpq	%rcx,%r8 | 
 | 	jb	.Lopen_sse_tail_192_rounds_and_x1hash | 
 | 	cmpq	$160,%r8 | 
 | 	jne	.Lopen_sse_tail_192_rounds | 
 | 	cmpq	$176,%rbx | 
 | 	jb	.Lopen_sse_tail_192_finish | 
 | 	addq	0+160(%rsi),%r10 | 
 | 	adcq	8+160(%rsi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	cmpq	$192,%rbx | 
 | 	jb	.Lopen_sse_tail_192_finish | 
 | 	addq	0+176(%rsi),%r10 | 
 | 	adcq	8+176(%rsi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | .Lopen_sse_tail_192_finish: | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm2 | 
 | 	paddd	0+48(%rbp),%xmm6 | 
 | 	paddd	0+64(%rbp),%xmm10 | 
 | 	paddd	0+128(%rbp),%xmm14 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm1 | 
 | 	paddd	0+48(%rbp),%xmm5 | 
 | 	paddd	0+64(%rbp),%xmm9 | 
 | 	paddd	0+112(%rbp),%xmm13 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm0 | 
 | 	paddd	0+48(%rbp),%xmm4 | 
 | 	paddd	0+64(%rbp),%xmm8 | 
 | 	paddd	0+96(%rbp),%xmm12 | 
 | 	movdqu	0 + 0(%rsi),%xmm3 | 
 | 	movdqu	16 + 0(%rsi),%xmm7 | 
 | 	movdqu	32 + 0(%rsi),%xmm11 | 
 | 	movdqu	48 + 0(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm2 | 
 | 	pxor	%xmm7,%xmm6 | 
 | 	pxor	%xmm11,%xmm10 | 
 | 	pxor	%xmm14,%xmm15 | 
 | 	movdqu	%xmm2,0 + 0(%rdi) | 
 | 	movdqu	%xmm6,16 + 0(%rdi) | 
 | 	movdqu	%xmm10,32 + 0(%rdi) | 
 | 	movdqu	%xmm15,48 + 0(%rdi) | 
 | 	movdqu	0 + 64(%rsi),%xmm3 | 
 | 	movdqu	16 + 64(%rsi),%xmm7 | 
 | 	movdqu	32 + 64(%rsi),%xmm11 | 
 | 	movdqu	48 + 64(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm1 | 
 | 	pxor	%xmm7,%xmm5 | 
 | 	pxor	%xmm11,%xmm9 | 
 | 	pxor	%xmm13,%xmm15 | 
 | 	movdqu	%xmm1,0 + 64(%rdi) | 
 | 	movdqu	%xmm5,16 + 64(%rdi) | 
 | 	movdqu	%xmm9,32 + 64(%rdi) | 
 | 	movdqu	%xmm15,48 + 64(%rdi) | 
 |  | 
 | 	subq	$128,%rbx | 
 | 	leaq	128(%rsi),%rsi | 
 | 	leaq	128(%rdi),%rdi | 
 | 	jmp	.Lopen_sse_tail_64_dec_loop | 
 |  | 
 | .Lopen_sse_tail_256: | 
 | 	movdqa	.Lchacha20_consts(%rip),%xmm0 | 
 | 	movdqa	0+48(%rbp),%xmm4 | 
 | 	movdqa	0+64(%rbp),%xmm8 | 
 | 	movdqa	%xmm0,%xmm1 | 
 | 	movdqa	%xmm4,%xmm5 | 
 | 	movdqa	%xmm8,%xmm9 | 
 | 	movdqa	%xmm0,%xmm2 | 
 | 	movdqa	%xmm4,%xmm6 | 
 | 	movdqa	%xmm8,%xmm10 | 
 | 	movdqa	%xmm0,%xmm3 | 
 | 	movdqa	%xmm4,%xmm7 | 
 | 	movdqa	%xmm8,%xmm11 | 
 | 	movdqa	0+96(%rbp),%xmm15 | 
 | 	paddd	.Lsse_inc(%rip),%xmm15 | 
 | 	movdqa	%xmm15,%xmm14 | 
 | 	paddd	.Lsse_inc(%rip),%xmm14 | 
 | 	movdqa	%xmm14,%xmm13 | 
 | 	paddd	.Lsse_inc(%rip),%xmm13 | 
 | 	movdqa	%xmm13,%xmm12 | 
 | 	paddd	.Lsse_inc(%rip),%xmm12 | 
 | 	movdqa	%xmm12,0+96(%rbp) | 
 | 	movdqa	%xmm13,0+112(%rbp) | 
 | 	movdqa	%xmm14,0+128(%rbp) | 
 | 	movdqa	%xmm15,0+144(%rbp) | 
 |  | 
 | 	xorq	%r8,%r8 | 
 | .Lopen_sse_tail_256_rounds_and_x1hash: | 
 | 	addq	0+0(%rsi,%r8,1),%r10 | 
 | 	adcq	8+0(%rsi,%r8,1),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movdqa	%xmm11,0+80(%rbp) | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm11 | 
 | 	pslld	$12,%xmm11 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm11,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm11 | 
 | 	pslld	$7,%xmm11 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm11,%xmm4 | 
 | .byte	102,15,58,15,228,4 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,12 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol16(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm11 | 
 | 	pslld	$12,%xmm11 | 
 | 	psrld	$20,%xmm5 | 
 | 	pxor	%xmm11,%xmm5 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol8(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm11 | 
 | 	pslld	$7,%xmm11 | 
 | 	psrld	$25,%xmm5 | 
 | 	pxor	%xmm11,%xmm5 | 
 | .byte	102,15,58,15,237,4 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,12 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol16(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm11 | 
 | 	pslld	$12,%xmm11 | 
 | 	psrld	$20,%xmm6 | 
 | 	pxor	%xmm11,%xmm6 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol8(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm11 | 
 | 	pslld	$7,%xmm11 | 
 | 	psrld	$25,%xmm6 | 
 | 	pxor	%xmm11,%xmm6 | 
 | .byte	102,15,58,15,246,4 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,12 | 
 | 	movdqa	0+80(%rbp),%xmm11 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movdqa	%xmm9,0+80(%rbp) | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pshufb	.Lrol16(%rip),%xmm15 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	movdqa	%xmm7,%xmm9 | 
 | 	pslld	$12,%xmm9 | 
 | 	psrld	$20,%xmm7 | 
 | 	pxor	%xmm9,%xmm7 | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pshufb	.Lrol8(%rip),%xmm15 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	movdqa	%xmm7,%xmm9 | 
 | 	pslld	$7,%xmm9 | 
 | 	psrld	$25,%xmm7 | 
 | 	pxor	%xmm9,%xmm7 | 
 | .byte	102,15,58,15,255,4 | 
 | .byte	102,69,15,58,15,219,8 | 
 | .byte	102,69,15,58,15,255,12 | 
 | 	movdqa	0+80(%rbp),%xmm9 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	movdqa	%xmm11,0+80(%rbp) | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm11 | 
 | 	pslld	$12,%xmm11 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm11,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm11 | 
 | 	pslld	$7,%xmm11 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm11,%xmm4 | 
 | .byte	102,15,58,15,228,12 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,4 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol16(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm11 | 
 | 	pslld	$12,%xmm11 | 
 | 	psrld	$20,%xmm5 | 
 | 	pxor	%xmm11,%xmm5 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol8(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm11 | 
 | 	pslld	$7,%xmm11 | 
 | 	psrld	$25,%xmm5 | 
 | 	pxor	%xmm11,%xmm5 | 
 | .byte	102,15,58,15,237,12 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,4 | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol16(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm11 | 
 | 	pslld	$12,%xmm11 | 
 | 	psrld	$20,%xmm6 | 
 | 	pxor	%xmm11,%xmm6 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol8(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm11 | 
 | 	pslld	$7,%xmm11 | 
 | 	psrld	$25,%xmm6 | 
 | 	pxor	%xmm11,%xmm6 | 
 | .byte	102,15,58,15,246,12 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,4 | 
 | 	movdqa	0+80(%rbp),%xmm11 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	movdqa	%xmm9,0+80(%rbp) | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pshufb	.Lrol16(%rip),%xmm15 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	movdqa	%xmm7,%xmm9 | 
 | 	pslld	$12,%xmm9 | 
 | 	psrld	$20,%xmm7 | 
 | 	pxor	%xmm9,%xmm7 | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pshufb	.Lrol8(%rip),%xmm15 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	movdqa	%xmm7,%xmm9 | 
 | 	pslld	$7,%xmm9 | 
 | 	psrld	$25,%xmm7 | 
 | 	pxor	%xmm9,%xmm7 | 
 | .byte	102,15,58,15,255,12 | 
 | .byte	102,69,15,58,15,219,8 | 
 | .byte	102,69,15,58,15,255,4 | 
 | 	movdqa	0+80(%rbp),%xmm9 | 
 |  | 
 | 	addq	$16,%r8 | 
 | 	cmpq	$160,%r8 | 
 | 	jb	.Lopen_sse_tail_256_rounds_and_x1hash | 
 |  | 
 | 	movq	%rbx,%rcx | 
 | 	andq	$-16,%rcx | 
 | .Lopen_sse_tail_256_hash: | 
 | 	addq	0+0(%rsi,%r8,1),%r10 | 
 | 	adcq	8+0(%rsi,%r8,1),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	addq	$16,%r8 | 
 | 	cmpq	%rcx,%r8 | 
 | 	jb	.Lopen_sse_tail_256_hash | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm3 | 
 | 	paddd	0+48(%rbp),%xmm7 | 
 | 	paddd	0+64(%rbp),%xmm11 | 
 | 	paddd	0+144(%rbp),%xmm15 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm2 | 
 | 	paddd	0+48(%rbp),%xmm6 | 
 | 	paddd	0+64(%rbp),%xmm10 | 
 | 	paddd	0+128(%rbp),%xmm14 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm1 | 
 | 	paddd	0+48(%rbp),%xmm5 | 
 | 	paddd	0+64(%rbp),%xmm9 | 
 | 	paddd	0+112(%rbp),%xmm13 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm0 | 
 | 	paddd	0+48(%rbp),%xmm4 | 
 | 	paddd	0+64(%rbp),%xmm8 | 
 | 	paddd	0+96(%rbp),%xmm12 | 
 | 	movdqa	%xmm12,0+80(%rbp) | 
 | 	movdqu	0 + 0(%rsi),%xmm12 | 
 | 	pxor	%xmm3,%xmm12 | 
 | 	movdqu	%xmm12,0 + 0(%rdi) | 
 | 	movdqu	16 + 0(%rsi),%xmm12 | 
 | 	pxor	%xmm7,%xmm12 | 
 | 	movdqu	%xmm12,16 + 0(%rdi) | 
 | 	movdqu	32 + 0(%rsi),%xmm12 | 
 | 	pxor	%xmm11,%xmm12 | 
 | 	movdqu	%xmm12,32 + 0(%rdi) | 
 | 	movdqu	48 + 0(%rsi),%xmm12 | 
 | 	pxor	%xmm15,%xmm12 | 
 | 	movdqu	%xmm12,48 + 0(%rdi) | 
 | 	movdqu	0 + 64(%rsi),%xmm3 | 
 | 	movdqu	16 + 64(%rsi),%xmm7 | 
 | 	movdqu	32 + 64(%rsi),%xmm11 | 
 | 	movdqu	48 + 64(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm2 | 
 | 	pxor	%xmm7,%xmm6 | 
 | 	pxor	%xmm11,%xmm10 | 
 | 	pxor	%xmm14,%xmm15 | 
 | 	movdqu	%xmm2,0 + 64(%rdi) | 
 | 	movdqu	%xmm6,16 + 64(%rdi) | 
 | 	movdqu	%xmm10,32 + 64(%rdi) | 
 | 	movdqu	%xmm15,48 + 64(%rdi) | 
 | 	movdqu	0 + 128(%rsi),%xmm3 | 
 | 	movdqu	16 + 128(%rsi),%xmm7 | 
 | 	movdqu	32 + 128(%rsi),%xmm11 | 
 | 	movdqu	48 + 128(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm1 | 
 | 	pxor	%xmm7,%xmm5 | 
 | 	pxor	%xmm11,%xmm9 | 
 | 	pxor	%xmm13,%xmm15 | 
 | 	movdqu	%xmm1,0 + 128(%rdi) | 
 | 	movdqu	%xmm5,16 + 128(%rdi) | 
 | 	movdqu	%xmm9,32 + 128(%rdi) | 
 | 	movdqu	%xmm15,48 + 128(%rdi) | 
 |  | 
 | 	movdqa	0+80(%rbp),%xmm12 | 
 | 	subq	$192,%rbx | 
 | 	leaq	192(%rsi),%rsi | 
 | 	leaq	192(%rdi),%rdi | 
 |  | 
 |  | 
 | .Lopen_sse_tail_64_dec_loop: | 
 | 	cmpq	$16,%rbx | 
 | 	jb	.Lopen_sse_tail_16_init | 
 | 	subq	$16,%rbx | 
 | 	movdqu	(%rsi),%xmm3 | 
 | 	pxor	%xmm3,%xmm0 | 
 | 	movdqu	%xmm0,(%rdi) | 
 | 	leaq	16(%rsi),%rsi | 
 | 	leaq	16(%rdi),%rdi | 
 | 	movdqa	%xmm4,%xmm0 | 
 | 	movdqa	%xmm8,%xmm4 | 
 | 	movdqa	%xmm12,%xmm8 | 
 | 	jmp	.Lopen_sse_tail_64_dec_loop | 
 | .Lopen_sse_tail_16_init: | 
 | 	movdqa	%xmm0,%xmm1 | 
 |  | 
 |  | 
 | .Lopen_sse_tail_16: | 
 | 	testq	%rbx,%rbx | 
 | 	jz	.Lopen_sse_finalize | 
 |  | 
 |  | 
 |  | 
 | 	pxor	%xmm3,%xmm3 | 
 | 	leaq	-1(%rsi,%rbx,1),%rsi | 
 | 	movq	%rbx,%r8 | 
 | .Lopen_sse_tail_16_compose: | 
 | 	pslldq	$1,%xmm3 | 
 | 	pinsrb	$0,(%rsi),%xmm3 | 
 | 	subq	$1,%rsi | 
 | 	subq	$1,%r8 | 
 | 	jnz	.Lopen_sse_tail_16_compose | 
 |  | 
 | .byte	102,73,15,126,221 | 
 | 	pextrq	$1,%xmm3,%r14 | 
 |  | 
 | 	pxor	%xmm1,%xmm3 | 
 |  | 
 |  | 
 | .Lopen_sse_tail_16_extract: | 
 | 	pextrb	$0,%xmm3,(%rdi) | 
 | 	psrldq	$1,%xmm3 | 
 | 	addq	$1,%rdi | 
 | 	subq	$1,%rbx | 
 | 	jne	.Lopen_sse_tail_16_extract | 
 |  | 
 | 	addq	%r13,%r10 | 
 | 	adcq	%r14,%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 |  | 
 | .Lopen_sse_finalize: | 
 | 	addq	0+0+32(%rbp),%r10 | 
 | 	adcq	8+0+32(%rbp),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 |  | 
 | 	movq	%r10,%r13 | 
 | 	movq	%r11,%r14 | 
 | 	movq	%r12,%r15 | 
 | 	subq	$-5,%r10 | 
 | 	sbbq	$-1,%r11 | 
 | 	sbbq	$3,%r12 | 
 | 	cmovcq	%r13,%r10 | 
 | 	cmovcq	%r14,%r11 | 
 | 	cmovcq	%r15,%r12 | 
 |  | 
 | 	addq	0+0+16(%rbp),%r10 | 
 | 	adcq	8+0+16(%rbp),%r11 | 
 |  | 
 | .cfi_remember_state	 | 
 | 	addq	$288 + 0 + 32,%rsp | 
 | .cfi_adjust_cfa_offset	-(288 + 32) | 
 |  | 
 | 	popq	%r9 | 
 | .cfi_adjust_cfa_offset	-8 | 
 | .cfi_restore	%r9 | 
 | 	movq	%r10,(%r9) | 
 | 	movq	%r11,8(%r9) | 
 | 	popq	%r15 | 
 | .cfi_adjust_cfa_offset	-8 | 
 | .cfi_restore	%r15 | 
 | 	popq	%r14 | 
 | .cfi_adjust_cfa_offset	-8 | 
 | .cfi_restore	%r14 | 
 | 	popq	%r13 | 
 | .cfi_adjust_cfa_offset	-8 | 
 | .cfi_restore	%r13 | 
 | 	popq	%r12 | 
 | .cfi_adjust_cfa_offset	-8 | 
 | .cfi_restore	%r12 | 
 | 	popq	%rbx | 
 | .cfi_adjust_cfa_offset	-8 | 
 | .cfi_restore	%rbx | 
 | 	popq	%rbp | 
 | .cfi_adjust_cfa_offset	-8 | 
 | .cfi_restore	%rbp | 
 | 	ret | 
 |  | 
 | .Lopen_sse_128: | 
 | .cfi_restore_state	 | 
 | 	movdqu	.Lchacha20_consts(%rip),%xmm0 | 
 | 	movdqa	%xmm0,%xmm1 | 
 | 	movdqa	%xmm0,%xmm2 | 
 | 	movdqu	0(%r9),%xmm4 | 
 | 	movdqa	%xmm4,%xmm5 | 
 | 	movdqa	%xmm4,%xmm6 | 
 | 	movdqu	16(%r9),%xmm8 | 
 | 	movdqa	%xmm8,%xmm9 | 
 | 	movdqa	%xmm8,%xmm10 | 
 | 	movdqu	32(%r9),%xmm12 | 
 | 	movdqa	%xmm12,%xmm13 | 
 | 	paddd	.Lsse_inc(%rip),%xmm13 | 
 | 	movdqa	%xmm13,%xmm14 | 
 | 	paddd	.Lsse_inc(%rip),%xmm14 | 
 | 	movdqa	%xmm4,%xmm7 | 
 | 	movdqa	%xmm8,%xmm11 | 
 | 	movdqa	%xmm13,%xmm15 | 
 | 	movq	$10,%r10 | 
 |  | 
 | .Lopen_sse_128_rounds: | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,4 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,12 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol16(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol8(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | .byte	102,15,58,15,237,4 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,12 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol16(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol8(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | .byte	102,15,58,15,246,4 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,12 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,12 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,4 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol16(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol8(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | .byte	102,15,58,15,237,12 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,4 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol16(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol8(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | .byte	102,15,58,15,246,12 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,4 | 
 |  | 
 | 	decq	%r10 | 
 | 	jnz	.Lopen_sse_128_rounds | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm0 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm1 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm2 | 
 | 	paddd	%xmm7,%xmm4 | 
 | 	paddd	%xmm7,%xmm5 | 
 | 	paddd	%xmm7,%xmm6 | 
 | 	paddd	%xmm11,%xmm9 | 
 | 	paddd	%xmm11,%xmm10 | 
 | 	paddd	%xmm15,%xmm13 | 
 | 	paddd	.Lsse_inc(%rip),%xmm15 | 
 | 	paddd	%xmm15,%xmm14 | 
 |  | 
 | 	pand	.Lclamp(%rip),%xmm0 | 
 | 	movdqa	%xmm0,0+0(%rbp) | 
 | 	movdqa	%xmm4,0+16(%rbp) | 
 |  | 
 | 	movq	%r8,%r8 | 
 | 	call	poly_hash_ad_internal | 
 | .Lopen_sse_128_xor_hash: | 
 | 	cmpq	$16,%rbx | 
 | 	jb	.Lopen_sse_tail_16 | 
 | 	subq	$16,%rbx | 
 | 	addq	0+0(%rsi),%r10 | 
 | 	adcq	8+0(%rsi),%r11 | 
 | 	adcq	$1,%r12 | 
 |  | 
 |  | 
 | 	movdqu	0(%rsi),%xmm3 | 
 | 	pxor	%xmm3,%xmm1 | 
 | 	movdqu	%xmm1,0(%rdi) | 
 | 	leaq	16(%rsi),%rsi | 
 | 	leaq	16(%rdi),%rdi | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 |  | 
 | 	movdqa	%xmm5,%xmm1 | 
 | 	movdqa	%xmm9,%xmm5 | 
 | 	movdqa	%xmm13,%xmm9 | 
 | 	movdqa	%xmm2,%xmm13 | 
 | 	movdqa	%xmm6,%xmm2 | 
 | 	movdqa	%xmm10,%xmm6 | 
 | 	movdqa	%xmm14,%xmm10 | 
 | 	jmp	.Lopen_sse_128_xor_hash | 
 | .size	chacha20_poly1305_open_sse41, .-chacha20_poly1305_open_sse41 | 
 | .cfi_endproc	 | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 | .globl	chacha20_poly1305_seal_sse41 | 
 | .hidden chacha20_poly1305_seal_sse41 | 
 | .type	chacha20_poly1305_seal_sse41,@function | 
 | .align	64 | 
 | chacha20_poly1305_seal_sse41: | 
 | .cfi_startproc	 | 
 | _CET_ENDBR | 
 | 	pushq	%rbp | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%rbp,-16 | 
 | 	pushq	%rbx | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%rbx,-24 | 
 | 	pushq	%r12 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r12,-32 | 
 | 	pushq	%r13 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r13,-40 | 
 | 	pushq	%r14 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r14,-48 | 
 | 	pushq	%r15 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r15,-56 | 
 |  | 
 |  | 
 | 	pushq	%r9 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r9,-64 | 
 | 	subq	$288 + 0 + 32,%rsp | 
 | .cfi_adjust_cfa_offset	288 + 32 | 
 | 	leaq	32(%rsp),%rbp | 
 | 	andq	$-32,%rbp | 
 |  | 
 | 	movq	56(%r9),%rbx | 
 | 	addq	%rdx,%rbx | 
 | 	movq	%r8,0+0+32(%rbp) | 
 | 	movq	%rbx,8+0+32(%rbp) | 
 | 	movq	%rdx,%rbx | 
 |  | 
 | 	cmpq	$128,%rbx | 
 | 	jbe	.Lseal_sse_128 | 
 |  | 
 | 	movdqa	.Lchacha20_consts(%rip),%xmm0 | 
 | 	movdqu	0(%r9),%xmm4 | 
 | 	movdqu	16(%r9),%xmm8 | 
 | 	movdqu	32(%r9),%xmm12 | 
 |  | 
 | 	movdqa	%xmm0,%xmm1 | 
 | 	movdqa	%xmm0,%xmm2 | 
 | 	movdqa	%xmm0,%xmm3 | 
 | 	movdqa	%xmm4,%xmm5 | 
 | 	movdqa	%xmm4,%xmm6 | 
 | 	movdqa	%xmm4,%xmm7 | 
 | 	movdqa	%xmm8,%xmm9 | 
 | 	movdqa	%xmm8,%xmm10 | 
 | 	movdqa	%xmm8,%xmm11 | 
 | 	movdqa	%xmm12,%xmm15 | 
 | 	paddd	.Lsse_inc(%rip),%xmm12 | 
 | 	movdqa	%xmm12,%xmm14 | 
 | 	paddd	.Lsse_inc(%rip),%xmm12 | 
 | 	movdqa	%xmm12,%xmm13 | 
 | 	paddd	.Lsse_inc(%rip),%xmm12 | 
 |  | 
 | 	movdqa	%xmm4,0+48(%rbp) | 
 | 	movdqa	%xmm8,0+64(%rbp) | 
 | 	movdqa	%xmm12,0+96(%rbp) | 
 | 	movdqa	%xmm13,0+112(%rbp) | 
 | 	movdqa	%xmm14,0+128(%rbp) | 
 | 	movdqa	%xmm15,0+144(%rbp) | 
 | 	movq	$10,%r10 | 
 | .Lseal_sse_init_rounds: | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	.Lrol16(%rip),%xmm8 | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pxor	%xmm0,%xmm12 | 
 | .byte	102,69,15,56,0,248 | 
 | .byte	102,69,15,56,0,240 | 
 | .byte	102,69,15,56,0,232 | 
 | .byte	102,69,15,56,0,224 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	%xmm7,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm7 | 
 | 	pxor	%xmm8,%xmm7 | 
 | 	movdqa	%xmm6,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm6 | 
 | 	pxor	%xmm8,%xmm6 | 
 | 	movdqa	%xmm5,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm5 | 
 | 	pxor	%xmm8,%xmm5 | 
 | 	movdqa	%xmm4,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm4 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	.Lrol8(%rip),%xmm8 | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pxor	%xmm0,%xmm12 | 
 | .byte	102,69,15,56,0,248 | 
 | .byte	102,69,15,56,0,240 | 
 | .byte	102,69,15,56,0,232 | 
 | .byte	102,69,15,56,0,224 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	%xmm7,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm7 | 
 | 	pxor	%xmm8,%xmm7 | 
 | 	movdqa	%xmm6,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm6 | 
 | 	pxor	%xmm8,%xmm6 | 
 | 	movdqa	%xmm5,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm5 | 
 | 	pxor	%xmm8,%xmm5 | 
 | 	movdqa	%xmm4,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm4 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | .byte	102,15,58,15,255,4 | 
 | .byte	102,69,15,58,15,219,8 | 
 | .byte	102,69,15,58,15,255,12 | 
 | .byte	102,15,58,15,246,4 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,12 | 
 | .byte	102,15,58,15,237,4 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,12 | 
 | .byte	102,15,58,15,228,4 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,12 | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	.Lrol16(%rip),%xmm8 | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pxor	%xmm0,%xmm12 | 
 | .byte	102,69,15,56,0,248 | 
 | .byte	102,69,15,56,0,240 | 
 | .byte	102,69,15,56,0,232 | 
 | .byte	102,69,15,56,0,224 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	%xmm7,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm7 | 
 | 	pxor	%xmm8,%xmm7 | 
 | 	movdqa	%xmm6,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm6 | 
 | 	pxor	%xmm8,%xmm6 | 
 | 	movdqa	%xmm5,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm5 | 
 | 	pxor	%xmm8,%xmm5 | 
 | 	movdqa	%xmm4,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm4 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	.Lrol8(%rip),%xmm8 | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pxor	%xmm0,%xmm12 | 
 | .byte	102,69,15,56,0,248 | 
 | .byte	102,69,15,56,0,240 | 
 | .byte	102,69,15,56,0,232 | 
 | .byte	102,69,15,56,0,224 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	%xmm7,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm7 | 
 | 	pxor	%xmm8,%xmm7 | 
 | 	movdqa	%xmm6,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm6 | 
 | 	pxor	%xmm8,%xmm6 | 
 | 	movdqa	%xmm5,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm5 | 
 | 	pxor	%xmm8,%xmm5 | 
 | 	movdqa	%xmm4,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm4 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | .byte	102,15,58,15,255,12 | 
 | .byte	102,69,15,58,15,219,8 | 
 | .byte	102,69,15,58,15,255,4 | 
 | .byte	102,15,58,15,246,12 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,4 | 
 | .byte	102,15,58,15,237,12 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,4 | 
 | .byte	102,15,58,15,228,12 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,4 | 
 |  | 
 | 	decq	%r10 | 
 | 	jnz	.Lseal_sse_init_rounds | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm3 | 
 | 	paddd	0+48(%rbp),%xmm7 | 
 | 	paddd	0+64(%rbp),%xmm11 | 
 | 	paddd	0+144(%rbp),%xmm15 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm2 | 
 | 	paddd	0+48(%rbp),%xmm6 | 
 | 	paddd	0+64(%rbp),%xmm10 | 
 | 	paddd	0+128(%rbp),%xmm14 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm1 | 
 | 	paddd	0+48(%rbp),%xmm5 | 
 | 	paddd	0+64(%rbp),%xmm9 | 
 | 	paddd	0+112(%rbp),%xmm13 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm0 | 
 | 	paddd	0+48(%rbp),%xmm4 | 
 | 	paddd	0+64(%rbp),%xmm8 | 
 | 	paddd	0+96(%rbp),%xmm12 | 
 |  | 
 |  | 
 | 	pand	.Lclamp(%rip),%xmm3 | 
 | 	movdqa	%xmm3,0+0(%rbp) | 
 | 	movdqa	%xmm7,0+16(%rbp) | 
 |  | 
 | 	movq	%r8,%r8 | 
 | 	call	poly_hash_ad_internal | 
 | 	movdqu	0 + 0(%rsi),%xmm3 | 
 | 	movdqu	16 + 0(%rsi),%xmm7 | 
 | 	movdqu	32 + 0(%rsi),%xmm11 | 
 | 	movdqu	48 + 0(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm2 | 
 | 	pxor	%xmm7,%xmm6 | 
 | 	pxor	%xmm11,%xmm10 | 
 | 	pxor	%xmm14,%xmm15 | 
 | 	movdqu	%xmm2,0 + 0(%rdi) | 
 | 	movdqu	%xmm6,16 + 0(%rdi) | 
 | 	movdqu	%xmm10,32 + 0(%rdi) | 
 | 	movdqu	%xmm15,48 + 0(%rdi) | 
 | 	movdqu	0 + 64(%rsi),%xmm3 | 
 | 	movdqu	16 + 64(%rsi),%xmm7 | 
 | 	movdqu	32 + 64(%rsi),%xmm11 | 
 | 	movdqu	48 + 64(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm1 | 
 | 	pxor	%xmm7,%xmm5 | 
 | 	pxor	%xmm11,%xmm9 | 
 | 	pxor	%xmm13,%xmm15 | 
 | 	movdqu	%xmm1,0 + 64(%rdi) | 
 | 	movdqu	%xmm5,16 + 64(%rdi) | 
 | 	movdqu	%xmm9,32 + 64(%rdi) | 
 | 	movdqu	%xmm15,48 + 64(%rdi) | 
 |  | 
 | 	cmpq	$192,%rbx | 
 | 	ja	.Lseal_sse_main_init | 
 | 	movq	$128,%rcx | 
 | 	subq	$128,%rbx | 
 | 	leaq	128(%rsi),%rsi | 
 | 	jmp	.Lseal_sse_128_tail_hash | 
 | .Lseal_sse_main_init: | 
 | 	movdqu	0 + 128(%rsi),%xmm3 | 
 | 	movdqu	16 + 128(%rsi),%xmm7 | 
 | 	movdqu	32 + 128(%rsi),%xmm11 | 
 | 	movdqu	48 + 128(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm0 | 
 | 	pxor	%xmm7,%xmm4 | 
 | 	pxor	%xmm11,%xmm8 | 
 | 	pxor	%xmm12,%xmm15 | 
 | 	movdqu	%xmm0,0 + 128(%rdi) | 
 | 	movdqu	%xmm4,16 + 128(%rdi) | 
 | 	movdqu	%xmm8,32 + 128(%rdi) | 
 | 	movdqu	%xmm15,48 + 128(%rdi) | 
 |  | 
 | 	movq	$192,%rcx | 
 | 	subq	$192,%rbx | 
 | 	leaq	192(%rsi),%rsi | 
 | 	movq	$2,%rcx | 
 | 	movq	$8,%r8 | 
 | 	cmpq	$64,%rbx | 
 | 	jbe	.Lseal_sse_tail_64 | 
 | 	cmpq	$128,%rbx | 
 | 	jbe	.Lseal_sse_tail_128 | 
 | 	cmpq	$192,%rbx | 
 | 	jbe	.Lseal_sse_tail_192 | 
 |  | 
 | .Lseal_sse_main_loop: | 
 | 	movdqa	.Lchacha20_consts(%rip),%xmm0 | 
 | 	movdqa	0+48(%rbp),%xmm4 | 
 | 	movdqa	0+64(%rbp),%xmm8 | 
 | 	movdqa	%xmm0,%xmm1 | 
 | 	movdqa	%xmm4,%xmm5 | 
 | 	movdqa	%xmm8,%xmm9 | 
 | 	movdqa	%xmm0,%xmm2 | 
 | 	movdqa	%xmm4,%xmm6 | 
 | 	movdqa	%xmm8,%xmm10 | 
 | 	movdqa	%xmm0,%xmm3 | 
 | 	movdqa	%xmm4,%xmm7 | 
 | 	movdqa	%xmm8,%xmm11 | 
 | 	movdqa	0+96(%rbp),%xmm15 | 
 | 	paddd	.Lsse_inc(%rip),%xmm15 | 
 | 	movdqa	%xmm15,%xmm14 | 
 | 	paddd	.Lsse_inc(%rip),%xmm14 | 
 | 	movdqa	%xmm14,%xmm13 | 
 | 	paddd	.Lsse_inc(%rip),%xmm13 | 
 | 	movdqa	%xmm13,%xmm12 | 
 | 	paddd	.Lsse_inc(%rip),%xmm12 | 
 | 	movdqa	%xmm12,0+96(%rbp) | 
 | 	movdqa	%xmm13,0+112(%rbp) | 
 | 	movdqa	%xmm14,0+128(%rbp) | 
 | 	movdqa	%xmm15,0+144(%rbp) | 
 |  | 
 | .align	32 | 
 | .Lseal_sse_main_rounds: | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	.Lrol16(%rip),%xmm8 | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pxor	%xmm0,%xmm12 | 
 | .byte	102,69,15,56,0,248 | 
 | .byte	102,69,15,56,0,240 | 
 | .byte	102,69,15,56,0,232 | 
 | .byte	102,69,15,56,0,224 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	%xmm7,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm7 | 
 | 	pxor	%xmm8,%xmm7 | 
 | 	movdqa	%xmm6,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm6 | 
 | 	pxor	%xmm8,%xmm6 | 
 | 	movdqa	%xmm5,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm5 | 
 | 	pxor	%xmm8,%xmm5 | 
 | 	movdqa	%xmm4,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm4 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movdqa	.Lrol8(%rip),%xmm8 | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pxor	%xmm0,%xmm12 | 
 | .byte	102,69,15,56,0,248 | 
 | .byte	102,69,15,56,0,240 | 
 | .byte	102,69,15,56,0,232 | 
 | .byte	102,69,15,56,0,224 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	%xmm7,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm7 | 
 | 	pxor	%xmm8,%xmm7 | 
 | 	movdqa	%xmm6,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm6 | 
 | 	pxor	%xmm8,%xmm6 | 
 | 	movdqa	%xmm5,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm5 | 
 | 	pxor	%xmm8,%xmm5 | 
 | 	movdqa	%xmm4,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm4 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | .byte	102,15,58,15,255,4 | 
 | .byte	102,69,15,58,15,219,8 | 
 | .byte	102,69,15,58,15,255,12 | 
 | .byte	102,15,58,15,246,4 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,12 | 
 | .byte	102,15,58,15,237,4 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,12 | 
 | .byte	102,15,58,15,228,4 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,12 | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	.Lrol16(%rip),%xmm8 | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pxor	%xmm0,%xmm12 | 
 | .byte	102,69,15,56,0,248 | 
 | .byte	102,69,15,56,0,240 | 
 | .byte	102,69,15,56,0,232 | 
 | .byte	102,69,15,56,0,224 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	%xmm7,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm7 | 
 | 	pxor	%xmm8,%xmm7 | 
 | 	movdqa	%xmm6,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm6 | 
 | 	pxor	%xmm8,%xmm6 | 
 | 	movdqa	%xmm5,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm5 | 
 | 	pxor	%xmm8,%xmm5 | 
 | 	movdqa	%xmm4,%xmm8 | 
 | 	psrld	$20,%xmm8 | 
 | 	pslld	$32-20,%xmm4 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	.Lrol8(%rip),%xmm8 | 
 | 	paddd	%xmm7,%xmm3 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm3,%xmm15 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pxor	%xmm0,%xmm12 | 
 | .byte	102,69,15,56,0,248 | 
 | .byte	102,69,15,56,0,240 | 
 | .byte	102,69,15,56,0,232 | 
 | .byte	102,69,15,56,0,224 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | 	paddd	%xmm15,%xmm11 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm11,%xmm7 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm8,0+80(%rbp) | 
 | 	movdqa	%xmm7,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm7 | 
 | 	pxor	%xmm8,%xmm7 | 
 | 	movdqa	%xmm6,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm6 | 
 | 	pxor	%xmm8,%xmm6 | 
 | 	movdqa	%xmm5,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm5 | 
 | 	pxor	%xmm8,%xmm5 | 
 | 	movdqa	%xmm4,%xmm8 | 
 | 	psrld	$25,%xmm8 | 
 | 	pslld	$32-25,%xmm4 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	0+80(%rbp),%xmm8 | 
 | .byte	102,15,58,15,255,12 | 
 | .byte	102,69,15,58,15,219,8 | 
 | .byte	102,69,15,58,15,255,4 | 
 | .byte	102,15,58,15,246,12 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,4 | 
 | .byte	102,15,58,15,237,12 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,4 | 
 | .byte	102,15,58,15,228,12 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,4 | 
 |  | 
 | 	leaq	16(%rdi),%rdi | 
 | 	decq	%r8 | 
 | 	jge	.Lseal_sse_main_rounds | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%rdi),%rdi | 
 | 	decq	%rcx | 
 | 	jg	.Lseal_sse_main_rounds | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm3 | 
 | 	paddd	0+48(%rbp),%xmm7 | 
 | 	paddd	0+64(%rbp),%xmm11 | 
 | 	paddd	0+144(%rbp),%xmm15 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm2 | 
 | 	paddd	0+48(%rbp),%xmm6 | 
 | 	paddd	0+64(%rbp),%xmm10 | 
 | 	paddd	0+128(%rbp),%xmm14 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm1 | 
 | 	paddd	0+48(%rbp),%xmm5 | 
 | 	paddd	0+64(%rbp),%xmm9 | 
 | 	paddd	0+112(%rbp),%xmm13 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm0 | 
 | 	paddd	0+48(%rbp),%xmm4 | 
 | 	paddd	0+64(%rbp),%xmm8 | 
 | 	paddd	0+96(%rbp),%xmm12 | 
 |  | 
 | 	movdqa	%xmm14,0+80(%rbp) | 
 | 	movdqa	%xmm14,0+80(%rbp) | 
 | 	movdqu	0 + 0(%rsi),%xmm14 | 
 | 	pxor	%xmm3,%xmm14 | 
 | 	movdqu	%xmm14,0 + 0(%rdi) | 
 | 	movdqu	16 + 0(%rsi),%xmm14 | 
 | 	pxor	%xmm7,%xmm14 | 
 | 	movdqu	%xmm14,16 + 0(%rdi) | 
 | 	movdqu	32 + 0(%rsi),%xmm14 | 
 | 	pxor	%xmm11,%xmm14 | 
 | 	movdqu	%xmm14,32 + 0(%rdi) | 
 | 	movdqu	48 + 0(%rsi),%xmm14 | 
 | 	pxor	%xmm15,%xmm14 | 
 | 	movdqu	%xmm14,48 + 0(%rdi) | 
 |  | 
 | 	movdqa	0+80(%rbp),%xmm14 | 
 | 	movdqu	0 + 64(%rsi),%xmm3 | 
 | 	movdqu	16 + 64(%rsi),%xmm7 | 
 | 	movdqu	32 + 64(%rsi),%xmm11 | 
 | 	movdqu	48 + 64(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm2 | 
 | 	pxor	%xmm7,%xmm6 | 
 | 	pxor	%xmm11,%xmm10 | 
 | 	pxor	%xmm14,%xmm15 | 
 | 	movdqu	%xmm2,0 + 64(%rdi) | 
 | 	movdqu	%xmm6,16 + 64(%rdi) | 
 | 	movdqu	%xmm10,32 + 64(%rdi) | 
 | 	movdqu	%xmm15,48 + 64(%rdi) | 
 | 	movdqu	0 + 128(%rsi),%xmm3 | 
 | 	movdqu	16 + 128(%rsi),%xmm7 | 
 | 	movdqu	32 + 128(%rsi),%xmm11 | 
 | 	movdqu	48 + 128(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm1 | 
 | 	pxor	%xmm7,%xmm5 | 
 | 	pxor	%xmm11,%xmm9 | 
 | 	pxor	%xmm13,%xmm15 | 
 | 	movdqu	%xmm1,0 + 128(%rdi) | 
 | 	movdqu	%xmm5,16 + 128(%rdi) | 
 | 	movdqu	%xmm9,32 + 128(%rdi) | 
 | 	movdqu	%xmm15,48 + 128(%rdi) | 
 |  | 
 | 	cmpq	$256,%rbx | 
 | 	ja	.Lseal_sse_main_loop_xor | 
 |  | 
 | 	movq	$192,%rcx | 
 | 	subq	$192,%rbx | 
 | 	leaq	192(%rsi),%rsi | 
 | 	jmp	.Lseal_sse_128_tail_hash | 
 | .Lseal_sse_main_loop_xor: | 
 | 	movdqu	0 + 192(%rsi),%xmm3 | 
 | 	movdqu	16 + 192(%rsi),%xmm7 | 
 | 	movdqu	32 + 192(%rsi),%xmm11 | 
 | 	movdqu	48 + 192(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm0 | 
 | 	pxor	%xmm7,%xmm4 | 
 | 	pxor	%xmm11,%xmm8 | 
 | 	pxor	%xmm12,%xmm15 | 
 | 	movdqu	%xmm0,0 + 192(%rdi) | 
 | 	movdqu	%xmm4,16 + 192(%rdi) | 
 | 	movdqu	%xmm8,32 + 192(%rdi) | 
 | 	movdqu	%xmm15,48 + 192(%rdi) | 
 |  | 
 | 	leaq	256(%rsi),%rsi | 
 | 	subq	$256,%rbx | 
 | 	movq	$6,%rcx | 
 | 	movq	$4,%r8 | 
 | 	cmpq	$192,%rbx | 
 | 	jg	.Lseal_sse_main_loop | 
 | 	movq	%rbx,%rcx | 
 | 	testq	%rbx,%rbx | 
 | 	je	.Lseal_sse_128_tail_hash | 
 | 	movq	$6,%rcx | 
 | 	cmpq	$128,%rbx | 
 | 	ja	.Lseal_sse_tail_192 | 
 | 	cmpq	$64,%rbx | 
 | 	ja	.Lseal_sse_tail_128 | 
 |  | 
 | .Lseal_sse_tail_64: | 
 | 	movdqa	.Lchacha20_consts(%rip),%xmm0 | 
 | 	movdqa	0+48(%rbp),%xmm4 | 
 | 	movdqa	0+64(%rbp),%xmm8 | 
 | 	movdqa	0+96(%rbp),%xmm12 | 
 | 	paddd	.Lsse_inc(%rip),%xmm12 | 
 | 	movdqa	%xmm12,0+96(%rbp) | 
 |  | 
 | .Lseal_sse_tail_64_rounds_and_x2hash: | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%rdi),%rdi | 
 | .Lseal_sse_tail_64_rounds_and_x1hash: | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,4 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,12 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,12 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,4 | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%rdi),%rdi | 
 | 	decq	%rcx | 
 | 	jg	.Lseal_sse_tail_64_rounds_and_x2hash | 
 | 	decq	%r8 | 
 | 	jge	.Lseal_sse_tail_64_rounds_and_x1hash | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm0 | 
 | 	paddd	0+48(%rbp),%xmm4 | 
 | 	paddd	0+64(%rbp),%xmm8 | 
 | 	paddd	0+96(%rbp),%xmm12 | 
 |  | 
 | 	jmp	.Lseal_sse_128_tail_xor | 
 |  | 
 | .Lseal_sse_tail_128: | 
 | 	movdqa	.Lchacha20_consts(%rip),%xmm0 | 
 | 	movdqa	0+48(%rbp),%xmm4 | 
 | 	movdqa	0+64(%rbp),%xmm8 | 
 | 	movdqa	%xmm0,%xmm1 | 
 | 	movdqa	%xmm4,%xmm5 | 
 | 	movdqa	%xmm8,%xmm9 | 
 | 	movdqa	0+96(%rbp),%xmm13 | 
 | 	paddd	.Lsse_inc(%rip),%xmm13 | 
 | 	movdqa	%xmm13,%xmm12 | 
 | 	paddd	.Lsse_inc(%rip),%xmm12 | 
 | 	movdqa	%xmm12,0+96(%rbp) | 
 | 	movdqa	%xmm13,0+112(%rbp) | 
 |  | 
 | .Lseal_sse_tail_128_rounds_and_x2hash: | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%rdi),%rdi | 
 | .Lseal_sse_tail_128_rounds_and_x1hash: | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,4 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,12 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol16(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol8(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | .byte	102,15,58,15,237,4 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,12 | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,12 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,4 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol16(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol8(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | .byte	102,15,58,15,237,12 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,4 | 
 |  | 
 | 	leaq	16(%rdi),%rdi | 
 | 	decq	%rcx | 
 | 	jg	.Lseal_sse_tail_128_rounds_and_x2hash | 
 | 	decq	%r8 | 
 | 	jge	.Lseal_sse_tail_128_rounds_and_x1hash | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm1 | 
 | 	paddd	0+48(%rbp),%xmm5 | 
 | 	paddd	0+64(%rbp),%xmm9 | 
 | 	paddd	0+112(%rbp),%xmm13 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm0 | 
 | 	paddd	0+48(%rbp),%xmm4 | 
 | 	paddd	0+64(%rbp),%xmm8 | 
 | 	paddd	0+96(%rbp),%xmm12 | 
 | 	movdqu	0 + 0(%rsi),%xmm3 | 
 | 	movdqu	16 + 0(%rsi),%xmm7 | 
 | 	movdqu	32 + 0(%rsi),%xmm11 | 
 | 	movdqu	48 + 0(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm1 | 
 | 	pxor	%xmm7,%xmm5 | 
 | 	pxor	%xmm11,%xmm9 | 
 | 	pxor	%xmm13,%xmm15 | 
 | 	movdqu	%xmm1,0 + 0(%rdi) | 
 | 	movdqu	%xmm5,16 + 0(%rdi) | 
 | 	movdqu	%xmm9,32 + 0(%rdi) | 
 | 	movdqu	%xmm15,48 + 0(%rdi) | 
 |  | 
 | 	movq	$64,%rcx | 
 | 	subq	$64,%rbx | 
 | 	leaq	64(%rsi),%rsi | 
 | 	jmp	.Lseal_sse_128_tail_hash | 
 |  | 
 | .Lseal_sse_tail_192: | 
 | 	movdqa	.Lchacha20_consts(%rip),%xmm0 | 
 | 	movdqa	0+48(%rbp),%xmm4 | 
 | 	movdqa	0+64(%rbp),%xmm8 | 
 | 	movdqa	%xmm0,%xmm1 | 
 | 	movdqa	%xmm4,%xmm5 | 
 | 	movdqa	%xmm8,%xmm9 | 
 | 	movdqa	%xmm0,%xmm2 | 
 | 	movdqa	%xmm4,%xmm6 | 
 | 	movdqa	%xmm8,%xmm10 | 
 | 	movdqa	0+96(%rbp),%xmm14 | 
 | 	paddd	.Lsse_inc(%rip),%xmm14 | 
 | 	movdqa	%xmm14,%xmm13 | 
 | 	paddd	.Lsse_inc(%rip),%xmm13 | 
 | 	movdqa	%xmm13,%xmm12 | 
 | 	paddd	.Lsse_inc(%rip),%xmm12 | 
 | 	movdqa	%xmm12,0+96(%rbp) | 
 | 	movdqa	%xmm13,0+112(%rbp) | 
 | 	movdqa	%xmm14,0+128(%rbp) | 
 |  | 
 | .Lseal_sse_tail_192_rounds_and_x2hash: | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%rdi),%rdi | 
 | .Lseal_sse_tail_192_rounds_and_x1hash: | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,4 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,12 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol16(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol8(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | .byte	102,15,58,15,237,4 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,12 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol16(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol8(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | .byte	102,15,58,15,246,4 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,12 | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,12 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,4 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol16(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol8(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | .byte	102,15,58,15,237,12 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,4 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol16(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol8(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | .byte	102,15,58,15,246,12 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,4 | 
 |  | 
 | 	leaq	16(%rdi),%rdi | 
 | 	decq	%rcx | 
 | 	jg	.Lseal_sse_tail_192_rounds_and_x2hash | 
 | 	decq	%r8 | 
 | 	jge	.Lseal_sse_tail_192_rounds_and_x1hash | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm2 | 
 | 	paddd	0+48(%rbp),%xmm6 | 
 | 	paddd	0+64(%rbp),%xmm10 | 
 | 	paddd	0+128(%rbp),%xmm14 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm1 | 
 | 	paddd	0+48(%rbp),%xmm5 | 
 | 	paddd	0+64(%rbp),%xmm9 | 
 | 	paddd	0+112(%rbp),%xmm13 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm0 | 
 | 	paddd	0+48(%rbp),%xmm4 | 
 | 	paddd	0+64(%rbp),%xmm8 | 
 | 	paddd	0+96(%rbp),%xmm12 | 
 | 	movdqu	0 + 0(%rsi),%xmm3 | 
 | 	movdqu	16 + 0(%rsi),%xmm7 | 
 | 	movdqu	32 + 0(%rsi),%xmm11 | 
 | 	movdqu	48 + 0(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm2 | 
 | 	pxor	%xmm7,%xmm6 | 
 | 	pxor	%xmm11,%xmm10 | 
 | 	pxor	%xmm14,%xmm15 | 
 | 	movdqu	%xmm2,0 + 0(%rdi) | 
 | 	movdqu	%xmm6,16 + 0(%rdi) | 
 | 	movdqu	%xmm10,32 + 0(%rdi) | 
 | 	movdqu	%xmm15,48 + 0(%rdi) | 
 | 	movdqu	0 + 64(%rsi),%xmm3 | 
 | 	movdqu	16 + 64(%rsi),%xmm7 | 
 | 	movdqu	32 + 64(%rsi),%xmm11 | 
 | 	movdqu	48 + 64(%rsi),%xmm15 | 
 | 	pxor	%xmm3,%xmm1 | 
 | 	pxor	%xmm7,%xmm5 | 
 | 	pxor	%xmm11,%xmm9 | 
 | 	pxor	%xmm13,%xmm15 | 
 | 	movdqu	%xmm1,0 + 64(%rdi) | 
 | 	movdqu	%xmm5,16 + 64(%rdi) | 
 | 	movdqu	%xmm9,32 + 64(%rdi) | 
 | 	movdqu	%xmm15,48 + 64(%rdi) | 
 |  | 
 | 	movq	$128,%rcx | 
 | 	subq	$128,%rbx | 
 | 	leaq	128(%rsi),%rsi | 
 |  | 
 | .Lseal_sse_128_tail_hash: | 
 | 	cmpq	$16,%rcx | 
 | 	jb	.Lseal_sse_128_tail_xor | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	subq	$16,%rcx | 
 | 	leaq	16(%rdi),%rdi | 
 | 	jmp	.Lseal_sse_128_tail_hash | 
 |  | 
 | .Lseal_sse_128_tail_xor: | 
 | 	cmpq	$16,%rbx | 
 | 	jb	.Lseal_sse_tail_16 | 
 | 	subq	$16,%rbx | 
 |  | 
 | 	movdqu	0(%rsi),%xmm3 | 
 | 	pxor	%xmm3,%xmm0 | 
 | 	movdqu	%xmm0,0(%rdi) | 
 |  | 
 | 	addq	0(%rdi),%r10 | 
 | 	adcq	8(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	leaq	16(%rsi),%rsi | 
 | 	leaq	16(%rdi),%rdi | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 |  | 
 | 	movdqa	%xmm4,%xmm0 | 
 | 	movdqa	%xmm8,%xmm4 | 
 | 	movdqa	%xmm12,%xmm8 | 
 | 	movdqa	%xmm1,%xmm12 | 
 | 	movdqa	%xmm5,%xmm1 | 
 | 	movdqa	%xmm9,%xmm5 | 
 | 	movdqa	%xmm13,%xmm9 | 
 | 	jmp	.Lseal_sse_128_tail_xor | 
 |  | 
 | .Lseal_sse_tail_16: | 
 | 	testq	%rbx,%rbx | 
 | 	jz	.Lprocess_blocks_of_extra_in | 
 |  | 
 | 	movq	%rbx,%r8 | 
 | 	movq	%rbx,%rcx | 
 | 	leaq	-1(%rsi,%rbx,1),%rsi | 
 | 	pxor	%xmm15,%xmm15 | 
 | .Lseal_sse_tail_16_compose: | 
 | 	pslldq	$1,%xmm15 | 
 | 	pinsrb	$0,(%rsi),%xmm15 | 
 | 	leaq	-1(%rsi),%rsi | 
 | 	decq	%rcx | 
 | 	jne	.Lseal_sse_tail_16_compose | 
 |  | 
 |  | 
 | 	pxor	%xmm0,%xmm15 | 
 |  | 
 |  | 
 | 	movq	%rbx,%rcx | 
 | 	movdqu	%xmm15,%xmm0 | 
 | .Lseal_sse_tail_16_extract: | 
 | 	pextrb	$0,%xmm0,(%rdi) | 
 | 	psrldq	$1,%xmm0 | 
 | 	addq	$1,%rdi | 
 | 	subq	$1,%rcx | 
 | 	jnz	.Lseal_sse_tail_16_extract | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 | 	movq	288 + 0 + 32(%rsp),%r9 | 
 | 	movq	56(%r9),%r14 | 
 | 	movq	48(%r9),%r13 | 
 | 	testq	%r14,%r14 | 
 | 	jz	.Lprocess_partial_block | 
 |  | 
 | 	movq	$16,%r15 | 
 | 	subq	%rbx,%r15 | 
 | 	cmpq	%r15,%r14 | 
 |  | 
 | 	jge	.Lload_extra_in | 
 | 	movq	%r14,%r15 | 
 |  | 
 | .Lload_extra_in: | 
 |  | 
 |  | 
 | 	leaq	-1(%r13,%r15,1),%rsi | 
 |  | 
 |  | 
 | 	addq	%r15,%r13 | 
 | 	subq	%r15,%r14 | 
 | 	movq	%r13,48(%r9) | 
 | 	movq	%r14,56(%r9) | 
 |  | 
 |  | 
 |  | 
 | 	addq	%r15,%r8 | 
 |  | 
 |  | 
 | 	pxor	%xmm11,%xmm11 | 
 | .Lload_extra_load_loop: | 
 | 	pslldq	$1,%xmm11 | 
 | 	pinsrb	$0,(%rsi),%xmm11 | 
 | 	leaq	-1(%rsi),%rsi | 
 | 	subq	$1,%r15 | 
 | 	jnz	.Lload_extra_load_loop | 
 |  | 
 |  | 
 |  | 
 |  | 
 | 	movq	%rbx,%r15 | 
 |  | 
 | .Lload_extra_shift_loop: | 
 | 	pslldq	$1,%xmm11 | 
 | 	subq	$1,%r15 | 
 | 	jnz	.Lload_extra_shift_loop | 
 |  | 
 |  | 
 |  | 
 |  | 
 | 	leaq	.Land_masks(%rip),%r15 | 
 | 	shlq	$4,%rbx | 
 | 	pand	-16(%r15,%rbx,1),%xmm15 | 
 |  | 
 |  | 
 | 	por	%xmm11,%xmm15 | 
 |  | 
 |  | 
 |  | 
 | .byte	102,77,15,126,253 | 
 | 	pextrq	$1,%xmm15,%r14 | 
 | 	addq	%r13,%r10 | 
 | 	adcq	%r14,%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 |  | 
 | .Lprocess_blocks_of_extra_in: | 
 |  | 
 | 	movq	288+32+0 (%rsp),%r9 | 
 | 	movq	48(%r9),%rsi | 
 | 	movq	56(%r9),%r8 | 
 | 	movq	%r8,%rcx | 
 | 	shrq	$4,%r8 | 
 |  | 
 | .Lprocess_extra_hash_loop: | 
 | 	jz	process_extra_in_trailer | 
 | 	addq	0+0(%rsi),%r10 | 
 | 	adcq	8+0(%rsi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%rsi),%rsi | 
 | 	subq	$1,%r8 | 
 | 	jmp	.Lprocess_extra_hash_loop | 
 | process_extra_in_trailer: | 
 | 	andq	$15,%rcx | 
 | 	movq	%rcx,%rbx | 
 | 	jz	.Ldo_length_block | 
 | 	leaq	-1(%rsi,%rcx,1),%rsi | 
 |  | 
 | .Lprocess_extra_in_trailer_load: | 
 | 	pslldq	$1,%xmm15 | 
 | 	pinsrb	$0,(%rsi),%xmm15 | 
 | 	leaq	-1(%rsi),%rsi | 
 | 	subq	$1,%rcx | 
 | 	jnz	.Lprocess_extra_in_trailer_load | 
 |  | 
 | .Lprocess_partial_block: | 
 |  | 
 | 	leaq	.Land_masks(%rip),%r15 | 
 | 	shlq	$4,%rbx | 
 | 	pand	-16(%r15,%rbx,1),%xmm15 | 
 | .byte	102,77,15,126,253 | 
 | 	pextrq	$1,%xmm15,%r14 | 
 | 	addq	%r13,%r10 | 
 | 	adcq	%r14,%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 |  | 
 | .Ldo_length_block: | 
 | 	addq	0+0+32(%rbp),%r10 | 
 | 	adcq	8+0+32(%rbp),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 |  | 
 | 	movq	%r10,%r13 | 
 | 	movq	%r11,%r14 | 
 | 	movq	%r12,%r15 | 
 | 	subq	$-5,%r10 | 
 | 	sbbq	$-1,%r11 | 
 | 	sbbq	$3,%r12 | 
 | 	cmovcq	%r13,%r10 | 
 | 	cmovcq	%r14,%r11 | 
 | 	cmovcq	%r15,%r12 | 
 |  | 
 | 	addq	0+0+16(%rbp),%r10 | 
 | 	adcq	8+0+16(%rbp),%r11 | 
 |  | 
 | .cfi_remember_state	 | 
 | 	addq	$288 + 0 + 32,%rsp | 
 | .cfi_adjust_cfa_offset	-(288 + 32) | 
 |  | 
 | 	popq	%r9 | 
 | .cfi_adjust_cfa_offset	-8 | 
 | .cfi_restore	%r9 | 
 | 	movq	%r10,(%r9) | 
 | 	movq	%r11,8(%r9) | 
 | 	popq	%r15 | 
 | .cfi_adjust_cfa_offset	-8 | 
 | .cfi_restore	%r15 | 
 | 	popq	%r14 | 
 | .cfi_adjust_cfa_offset	-8 | 
 | .cfi_restore	%r14 | 
 | 	popq	%r13 | 
 | .cfi_adjust_cfa_offset	-8 | 
 | .cfi_restore	%r13 | 
 | 	popq	%r12 | 
 | .cfi_adjust_cfa_offset	-8 | 
 | .cfi_restore	%r12 | 
 | 	popq	%rbx | 
 | .cfi_adjust_cfa_offset	-8 | 
 | .cfi_restore	%rbx | 
 | 	popq	%rbp | 
 | .cfi_adjust_cfa_offset	-8 | 
 | .cfi_restore	%rbp | 
 | 	ret | 
 |  | 
 | .Lseal_sse_128: | 
 | .cfi_restore_state	 | 
 | 	movdqu	.Lchacha20_consts(%rip),%xmm0 | 
 | 	movdqa	%xmm0,%xmm1 | 
 | 	movdqa	%xmm0,%xmm2 | 
 | 	movdqu	0(%r9),%xmm4 | 
 | 	movdqa	%xmm4,%xmm5 | 
 | 	movdqa	%xmm4,%xmm6 | 
 | 	movdqu	16(%r9),%xmm8 | 
 | 	movdqa	%xmm8,%xmm9 | 
 | 	movdqa	%xmm8,%xmm10 | 
 | 	movdqu	32(%r9),%xmm14 | 
 | 	movdqa	%xmm14,%xmm12 | 
 | 	paddd	.Lsse_inc(%rip),%xmm12 | 
 | 	movdqa	%xmm12,%xmm13 | 
 | 	paddd	.Lsse_inc(%rip),%xmm13 | 
 | 	movdqa	%xmm4,%xmm7 | 
 | 	movdqa	%xmm8,%xmm11 | 
 | 	movdqa	%xmm12,%xmm15 | 
 | 	movq	$10,%r10 | 
 |  | 
 | .Lseal_sse_128_rounds: | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,4 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,12 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol16(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol8(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | .byte	102,15,58,15,237,4 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,12 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol16(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol8(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | .byte	102,15,58,15,246,4 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,12 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol16(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | 	paddd	%xmm4,%xmm0 | 
 | 	pxor	%xmm0,%xmm12 | 
 | 	pshufb	.Lrol8(%rip),%xmm12 | 
 | 	paddd	%xmm12,%xmm8 | 
 | 	pxor	%xmm8,%xmm4 | 
 | 	movdqa	%xmm4,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm4 | 
 | 	pxor	%xmm3,%xmm4 | 
 | .byte	102,15,58,15,228,12 | 
 | .byte	102,69,15,58,15,192,8 | 
 | .byte	102,69,15,58,15,228,4 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol16(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | 	paddd	%xmm5,%xmm1 | 
 | 	pxor	%xmm1,%xmm13 | 
 | 	pshufb	.Lrol8(%rip),%xmm13 | 
 | 	paddd	%xmm13,%xmm9 | 
 | 	pxor	%xmm9,%xmm5 | 
 | 	movdqa	%xmm5,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm5 | 
 | 	pxor	%xmm3,%xmm5 | 
 | .byte	102,15,58,15,237,12 | 
 | .byte	102,69,15,58,15,201,8 | 
 | .byte	102,69,15,58,15,237,4 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol16(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$12,%xmm3 | 
 | 	psrld	$20,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | 	paddd	%xmm6,%xmm2 | 
 | 	pxor	%xmm2,%xmm14 | 
 | 	pshufb	.Lrol8(%rip),%xmm14 | 
 | 	paddd	%xmm14,%xmm10 | 
 | 	pxor	%xmm10,%xmm6 | 
 | 	movdqa	%xmm6,%xmm3 | 
 | 	pslld	$7,%xmm3 | 
 | 	psrld	$25,%xmm6 | 
 | 	pxor	%xmm3,%xmm6 | 
 | .byte	102,15,58,15,246,12 | 
 | .byte	102,69,15,58,15,210,8 | 
 | .byte	102,69,15,58,15,246,4 | 
 |  | 
 | 	decq	%r10 | 
 | 	jnz	.Lseal_sse_128_rounds | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm0 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm1 | 
 | 	paddd	.Lchacha20_consts(%rip),%xmm2 | 
 | 	paddd	%xmm7,%xmm4 | 
 | 	paddd	%xmm7,%xmm5 | 
 | 	paddd	%xmm7,%xmm6 | 
 | 	paddd	%xmm11,%xmm8 | 
 | 	paddd	%xmm11,%xmm9 | 
 | 	paddd	%xmm15,%xmm12 | 
 | 	paddd	.Lsse_inc(%rip),%xmm15 | 
 | 	paddd	%xmm15,%xmm13 | 
 |  | 
 | 	pand	.Lclamp(%rip),%xmm2 | 
 | 	movdqa	%xmm2,0+0(%rbp) | 
 | 	movdqa	%xmm6,0+16(%rbp) | 
 |  | 
 | 	movq	%r8,%r8 | 
 | 	call	poly_hash_ad_internal | 
 | 	jmp	.Lseal_sse_128_tail_xor | 
 | .size	chacha20_poly1305_seal_sse41, .-chacha20_poly1305_seal_sse41 | 
 | .cfi_endproc	 | 
 |  | 
 |  | 
 | .globl	chacha20_poly1305_open_avx2 | 
 | .hidden chacha20_poly1305_open_avx2 | 
 | .type	chacha20_poly1305_open_avx2,@function | 
 | .align	64 | 
 | chacha20_poly1305_open_avx2: | 
 | .cfi_startproc	 | 
 | _CET_ENDBR | 
 | 	pushq	%rbp | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%rbp,-16 | 
 | 	pushq	%rbx | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%rbx,-24 | 
 | 	pushq	%r12 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r12,-32 | 
 | 	pushq	%r13 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r13,-40 | 
 | 	pushq	%r14 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r14,-48 | 
 | 	pushq	%r15 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r15,-56 | 
 |  | 
 |  | 
 | 	pushq	%r9 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r9,-64 | 
 | 	subq	$288 + 0 + 32,%rsp | 
 | .cfi_adjust_cfa_offset	288 + 32 | 
 |  | 
 | 	leaq	32(%rsp),%rbp | 
 | 	andq	$-32,%rbp | 
 |  | 
 | 	movq	%rdx,%rbx | 
 | 	movq	%r8,0+0+32(%rbp) | 
 | 	movq	%rbx,8+0+32(%rbp) | 
 |  | 
 | 	vzeroupper | 
 | 	vmovdqa	.Lchacha20_consts(%rip),%ymm0 | 
 | 	vbroadcasti128	0(%r9),%ymm4 | 
 | 	vbroadcasti128	16(%r9),%ymm8 | 
 | 	vbroadcasti128	32(%r9),%ymm12 | 
 | 	vpaddd	.Lavx2_init(%rip),%ymm12,%ymm12 | 
 | 	cmpq	$192,%rbx | 
 | 	jbe	.Lopen_avx2_192 | 
 | 	cmpq	$320,%rbx | 
 | 	jbe	.Lopen_avx2_320 | 
 |  | 
 | 	vmovdqa	%ymm4,0+64(%rbp) | 
 | 	vmovdqa	%ymm8,0+96(%rbp) | 
 | 	vmovdqa	%ymm12,0+160(%rbp) | 
 | 	movq	$10,%r10 | 
 | .Lopen_avx2_init_rounds: | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 |  | 
 | 	decq	%r10 | 
 | 	jne	.Lopen_avx2_init_rounds | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0 | 
 | 	vpaddd	0+64(%rbp),%ymm4,%ymm4 | 
 | 	vpaddd	0+96(%rbp),%ymm8,%ymm8 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 |  | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3 | 
 |  | 
 | 	vpand	.Lclamp(%rip),%ymm3,%ymm3 | 
 | 	vmovdqa	%ymm3,0+0(%rbp) | 
 |  | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4 | 
 |  | 
 | 	movq	%r8,%r8 | 
 | 	call	poly_hash_ad_internal | 
 |  | 
 | 	xorq	%rcx,%rcx | 
 | .Lopen_avx2_init_hash: | 
 | 	addq	0+0(%rsi,%rcx,1),%r10 | 
 | 	adcq	8+0(%rsi,%rcx,1),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	addq	$16,%rcx | 
 | 	cmpq	$64,%rcx | 
 | 	jne	.Lopen_avx2_init_hash | 
 |  | 
 | 	vpxor	0(%rsi),%ymm0,%ymm0 | 
 | 	vpxor	32(%rsi),%ymm4,%ymm4 | 
 |  | 
 | 	vmovdqu	%ymm0,0(%rdi) | 
 | 	vmovdqu	%ymm4,32(%rdi) | 
 | 	leaq	64(%rsi),%rsi | 
 | 	leaq	64(%rdi),%rdi | 
 | 	subq	$64,%rbx | 
 | .Lopen_avx2_main_loop: | 
 |  | 
 | 	cmpq	$512,%rbx | 
 | 	jb	.Lopen_avx2_main_loop_done | 
 | 	vmovdqa	.Lchacha20_consts(%rip),%ymm0 | 
 | 	vmovdqa	0+64(%rbp),%ymm4 | 
 | 	vmovdqa	0+96(%rbp),%ymm8 | 
 | 	vmovdqa	%ymm0,%ymm1 | 
 | 	vmovdqa	%ymm4,%ymm5 | 
 | 	vmovdqa	%ymm8,%ymm9 | 
 | 	vmovdqa	%ymm0,%ymm2 | 
 | 	vmovdqa	%ymm4,%ymm6 | 
 | 	vmovdqa	%ymm8,%ymm10 | 
 | 	vmovdqa	%ymm0,%ymm3 | 
 | 	vmovdqa	%ymm4,%ymm7 | 
 | 	vmovdqa	%ymm8,%ymm11 | 
 | 	vmovdqa	.Lavx2_inc(%rip),%ymm12 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm15 | 
 | 	vpaddd	%ymm15,%ymm12,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm12,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm15,0+256(%rbp) | 
 | 	vmovdqa	%ymm14,0+224(%rbp) | 
 | 	vmovdqa	%ymm13,0+192(%rbp) | 
 | 	vmovdqa	%ymm12,0+160(%rbp) | 
 |  | 
 | 	xorq	%rcx,%rcx | 
 | .Lopen_avx2_main_loop_rounds: | 
 | 	addq	0+0(%rsi,%rcx,1),%r10 | 
 | 	adcq	8+0(%rsi,%rcx,1),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vmovdqa	.Lrol16(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$20,%ymm7,%ymm8 | 
 | 	vpslld	$32-20,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$20,%ymm6,%ymm8 | 
 | 	vpslld	$32-20,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm5,%ymm8 | 
 | 	vpslld	$32-20,%ymm5,%ymm5 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm4,%ymm8 | 
 | 	vpslld	$32-20,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	.Lrol8(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	addq	0+16(%rsi,%rcx,1),%r10 | 
 | 	adcq	8+16(%rsi,%rcx,1),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$25,%ymm7,%ymm8 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	vpslld	$32-25,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$25,%ymm6,%ymm8 | 
 | 	vpslld	$32-25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$25,%ymm5,%ymm8 | 
 | 	vpslld	$32-25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$25,%ymm4,%ymm8 | 
 | 	vpslld	$32-25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	0+128(%rbp),%ymm8 | 
 | 	vpalignr	$4,%ymm7,%ymm7,%ymm7 | 
 | 	vpalignr	$8,%ymm11,%ymm11,%ymm11 | 
 | 	vpalignr	$12,%ymm15,%ymm15,%ymm15 | 
 | 	vpalignr	$4,%ymm6,%ymm6,%ymm6 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$12,%ymm14,%ymm14,%ymm14 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	vpalignr	$4,%ymm5,%ymm5,%ymm5 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$12,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vmovdqa	.Lrol16(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$20,%ymm7,%ymm8 | 
 | 	vpslld	$32-20,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$20,%ymm6,%ymm8 | 
 | 	vpslld	$32-20,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	addq	0+32(%rsi,%rcx,1),%r10 | 
 | 	adcq	8+32(%rsi,%rcx,1),%r11 | 
 | 	adcq	$1,%r12 | 
 |  | 
 | 	leaq	48(%rcx),%rcx | 
 | 	vpsrld	$20,%ymm5,%ymm8 | 
 | 	vpslld	$32-20,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm4,%ymm8 | 
 | 	vpslld	$32-20,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	.Lrol8(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$25,%ymm7,%ymm8 | 
 | 	vpslld	$32-25,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$25,%ymm6,%ymm8 | 
 | 	vpslld	$32-25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	vpsrld	$25,%ymm5,%ymm8 | 
 | 	vpslld	$32-25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$25,%ymm4,%ymm8 | 
 | 	vpslld	$32-25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	0+128(%rbp),%ymm8 | 
 | 	vpalignr	$12,%ymm7,%ymm7,%ymm7 | 
 | 	vpalignr	$8,%ymm11,%ymm11,%ymm11 | 
 | 	vpalignr	$4,%ymm15,%ymm15,%ymm15 | 
 | 	vpalignr	$12,%ymm6,%ymm6,%ymm6 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$4,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$12,%ymm5,%ymm5,%ymm5 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$4,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 |  | 
 | 	cmpq	$60*8,%rcx | 
 | 	jne	.Lopen_avx2_main_loop_rounds | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3 | 
 | 	vpaddd	0+64(%rbp),%ymm7,%ymm7 | 
 | 	vpaddd	0+96(%rbp),%ymm11,%ymm11 | 
 | 	vpaddd	0+256(%rbp),%ymm15,%ymm15 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2 | 
 | 	vpaddd	0+64(%rbp),%ymm6,%ymm6 | 
 | 	vpaddd	0+96(%rbp),%ymm10,%ymm10 | 
 | 	vpaddd	0+224(%rbp),%ymm14,%ymm14 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1 | 
 | 	vpaddd	0+64(%rbp),%ymm5,%ymm5 | 
 | 	vpaddd	0+96(%rbp),%ymm9,%ymm9 | 
 | 	vpaddd	0+192(%rbp),%ymm13,%ymm13 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0 | 
 | 	vpaddd	0+64(%rbp),%ymm4,%ymm4 | 
 | 	vpaddd	0+96(%rbp),%ymm8,%ymm8 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 |  | 
 | 	vmovdqa	%ymm0,0+128(%rbp) | 
 | 	addq	0+60*8(%rsi),%r10 | 
 | 	adcq	8+60*8(%rsi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0 | 
 | 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7 | 
 | 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11 | 
 | 	vpxor	0+0(%rsi),%ymm0,%ymm0 | 
 | 	vpxor	32+0(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	64+0(%rsi),%ymm7,%ymm7 | 
 | 	vpxor	96+0(%rsi),%ymm11,%ymm11 | 
 | 	vmovdqu	%ymm0,0+0(%rdi) | 
 | 	vmovdqu	%ymm3,32+0(%rdi) | 
 | 	vmovdqu	%ymm7,64+0(%rdi) | 
 | 	vmovdqu	%ymm11,96+0(%rdi) | 
 |  | 
 | 	vmovdqa	0+128(%rbp),%ymm0 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6 | 
 | 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2 | 
 | 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10 | 
 | 	vpxor	0+128(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+128(%rsi),%ymm2,%ymm2 | 
 | 	vpxor	64+128(%rsi),%ymm6,%ymm6 | 
 | 	vpxor	96+128(%rsi),%ymm10,%ymm10 | 
 | 	vmovdqu	%ymm3,0+128(%rdi) | 
 | 	vmovdqu	%ymm2,32+128(%rdi) | 
 | 	vmovdqu	%ymm6,64+128(%rdi) | 
 | 	vmovdqu	%ymm10,96+128(%rdi) | 
 | 	addq	0+60*8+16(%rsi),%r10 | 
 | 	adcq	8+60*8+16(%rsi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5 | 
 | 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1 | 
 | 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9 | 
 | 	vpxor	0+256(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+256(%rsi),%ymm1,%ymm1 | 
 | 	vpxor	64+256(%rsi),%ymm5,%ymm5 | 
 | 	vpxor	96+256(%rsi),%ymm9,%ymm9 | 
 | 	vmovdqu	%ymm3,0+256(%rdi) | 
 | 	vmovdqu	%ymm1,32+256(%rdi) | 
 | 	vmovdqu	%ymm5,64+256(%rdi) | 
 | 	vmovdqu	%ymm9,96+256(%rdi) | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4 | 
 | 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8 | 
 | 	vpxor	0+384(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+384(%rsi),%ymm0,%ymm0 | 
 | 	vpxor	64+384(%rsi),%ymm4,%ymm4 | 
 | 	vpxor	96+384(%rsi),%ymm8,%ymm8 | 
 | 	vmovdqu	%ymm3,0+384(%rdi) | 
 | 	vmovdqu	%ymm0,32+384(%rdi) | 
 | 	vmovdqu	%ymm4,64+384(%rdi) | 
 | 	vmovdqu	%ymm8,96+384(%rdi) | 
 |  | 
 | 	leaq	512(%rsi),%rsi | 
 | 	leaq	512(%rdi),%rdi | 
 | 	subq	$512,%rbx | 
 | 	jmp	.Lopen_avx2_main_loop | 
 | .Lopen_avx2_main_loop_done: | 
 | 	testq	%rbx,%rbx | 
 | 	vzeroupper | 
 | 	je	.Lopen_sse_finalize | 
 |  | 
 | 	cmpq	$384,%rbx | 
 | 	ja	.Lopen_avx2_tail_512 | 
 | 	cmpq	$256,%rbx | 
 | 	ja	.Lopen_avx2_tail_384 | 
 | 	cmpq	$128,%rbx | 
 | 	ja	.Lopen_avx2_tail_256 | 
 | 	vmovdqa	.Lchacha20_consts(%rip),%ymm0 | 
 | 	vmovdqa	0+64(%rbp),%ymm4 | 
 | 	vmovdqa	0+96(%rbp),%ymm8 | 
 | 	vmovdqa	.Lavx2_inc(%rip),%ymm12 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm12,0+160(%rbp) | 
 |  | 
 | 	xorq	%r8,%r8 | 
 | 	movq	%rbx,%rcx | 
 | 	andq	$-16,%rcx | 
 | 	testq	%rcx,%rcx | 
 | 	je	.Lopen_avx2_tail_128_rounds | 
 | .Lopen_avx2_tail_128_rounds_and_x1hash: | 
 | 	addq	0+0(%rsi,%r8,1),%r10 | 
 | 	adcq	8+0(%rsi,%r8,1),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | .Lopen_avx2_tail_128_rounds: | 
 | 	addq	$16,%r8 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 |  | 
 | 	cmpq	%rcx,%r8 | 
 | 	jb	.Lopen_avx2_tail_128_rounds_and_x1hash | 
 | 	cmpq	$160,%r8 | 
 | 	jne	.Lopen_avx2_tail_128_rounds | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0 | 
 | 	vpaddd	0+64(%rbp),%ymm4,%ymm4 | 
 | 	vpaddd	0+96(%rbp),%ymm8,%ymm8 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3 | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0 | 
 | 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm3,%ymm8 | 
 |  | 
 | 	jmp	.Lopen_avx2_tail_128_xor | 
 |  | 
 | .Lopen_avx2_tail_256: | 
 | 	vmovdqa	.Lchacha20_consts(%rip),%ymm0 | 
 | 	vmovdqa	0+64(%rbp),%ymm4 | 
 | 	vmovdqa	0+96(%rbp),%ymm8 | 
 | 	vmovdqa	%ymm0,%ymm1 | 
 | 	vmovdqa	%ymm4,%ymm5 | 
 | 	vmovdqa	%ymm8,%ymm9 | 
 | 	vmovdqa	.Lavx2_inc(%rip),%ymm12 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm12,0+160(%rbp) | 
 | 	vmovdqa	%ymm13,0+192(%rbp) | 
 |  | 
 | 	movq	%rbx,0+128(%rbp) | 
 | 	movq	%rbx,%rcx | 
 | 	subq	$128,%rcx | 
 | 	shrq	$4,%rcx | 
 | 	movq	$10,%r8 | 
 | 	cmpq	$10,%rcx | 
 | 	cmovgq	%r8,%rcx | 
 | 	movq	%rsi,%rbx | 
 | 	xorq	%r8,%r8 | 
 | .Lopen_avx2_tail_256_rounds_and_x1hash: | 
 | 	addq	0+0(%rbx),%r10 | 
 | 	adcq	8+0(%rbx),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%rbx),%rbx | 
 | .Lopen_avx2_tail_256_rounds: | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$12,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$4,%ymm5,%ymm5,%ymm5 | 
 |  | 
 | 	incq	%r8 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$4,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$12,%ymm5,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol16(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm6,%ymm3 | 
 | 	vpslld	$12,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol8(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpslld	$7,%ymm6,%ymm3 | 
 | 	vpsrld	$25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpalignr	$4,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$12,%ymm6,%ymm6,%ymm6 | 
 |  | 
 | 	cmpq	%rcx,%r8 | 
 | 	jb	.Lopen_avx2_tail_256_rounds_and_x1hash | 
 | 	cmpq	$10,%r8 | 
 | 	jne	.Lopen_avx2_tail_256_rounds | 
 | 	movq	%rbx,%r8 | 
 | 	subq	%rsi,%rbx | 
 | 	movq	%rbx,%rcx | 
 | 	movq	0+128(%rbp),%rbx | 
 | .Lopen_avx2_tail_256_hash: | 
 | 	addq	$16,%rcx | 
 | 	cmpq	%rbx,%rcx | 
 | 	jg	.Lopen_avx2_tail_256_done | 
 | 	addq	0+0(%r8),%r10 | 
 | 	adcq	8+0(%r8),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%r8),%r8 | 
 | 	jmp	.Lopen_avx2_tail_256_hash | 
 | .Lopen_avx2_tail_256_done: | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1 | 
 | 	vpaddd	0+64(%rbp),%ymm5,%ymm5 | 
 | 	vpaddd	0+96(%rbp),%ymm9,%ymm9 | 
 | 	vpaddd	0+192(%rbp),%ymm13,%ymm13 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0 | 
 | 	vpaddd	0+64(%rbp),%ymm4,%ymm4 | 
 | 	vpaddd	0+96(%rbp),%ymm8,%ymm8 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 | 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5 | 
 | 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1 | 
 | 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9 | 
 | 	vpxor	0+0(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+0(%rsi),%ymm1,%ymm1 | 
 | 	vpxor	64+0(%rsi),%ymm5,%ymm5 | 
 | 	vpxor	96+0(%rsi),%ymm9,%ymm9 | 
 | 	vmovdqu	%ymm3,0+0(%rdi) | 
 | 	vmovdqu	%ymm1,32+0(%rdi) | 
 | 	vmovdqu	%ymm5,64+0(%rdi) | 
 | 	vmovdqu	%ymm9,96+0(%rdi) | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3 | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0 | 
 | 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm3,%ymm8 | 
 |  | 
 | 	leaq	128(%rsi),%rsi | 
 | 	leaq	128(%rdi),%rdi | 
 | 	subq	$128,%rbx | 
 | 	jmp	.Lopen_avx2_tail_128_xor | 
 |  | 
 | .Lopen_avx2_tail_384: | 
 | 	vmovdqa	.Lchacha20_consts(%rip),%ymm0 | 
 | 	vmovdqa	0+64(%rbp),%ymm4 | 
 | 	vmovdqa	0+96(%rbp),%ymm8 | 
 | 	vmovdqa	%ymm0,%ymm1 | 
 | 	vmovdqa	%ymm4,%ymm5 | 
 | 	vmovdqa	%ymm8,%ymm9 | 
 | 	vmovdqa	%ymm0,%ymm2 | 
 | 	vmovdqa	%ymm4,%ymm6 | 
 | 	vmovdqa	%ymm8,%ymm10 | 
 | 	vmovdqa	.Lavx2_inc(%rip),%ymm12 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm12,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm12,0+160(%rbp) | 
 | 	vmovdqa	%ymm13,0+192(%rbp) | 
 | 	vmovdqa	%ymm14,0+224(%rbp) | 
 |  | 
 | 	movq	%rbx,0+128(%rbp) | 
 | 	movq	%rbx,%rcx | 
 | 	subq	$256,%rcx | 
 | 	shrq	$4,%rcx | 
 | 	addq	$6,%rcx | 
 | 	movq	$10,%r8 | 
 | 	cmpq	$10,%rcx | 
 | 	cmovgq	%r8,%rcx | 
 | 	movq	%rsi,%rbx | 
 | 	xorq	%r8,%r8 | 
 | .Lopen_avx2_tail_384_rounds_and_x2hash: | 
 | 	addq	0+0(%rbx),%r10 | 
 | 	adcq	8+0(%rbx),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%rbx),%rbx | 
 | .Lopen_avx2_tail_384_rounds_and_x1hash: | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol16(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm6,%ymm3 | 
 | 	vpslld	$12,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol8(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpslld	$7,%ymm6,%ymm3 | 
 | 	vpsrld	$25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpalignr	$12,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$4,%ymm6,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$12,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$4,%ymm5,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	addq	0+0(%rbx),%r10 | 
 | 	adcq	8+0(%rbx),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%rbx),%rbx | 
 | 	incq	%r8 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol16(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm6,%ymm3 | 
 | 	vpslld	$12,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol8(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpslld	$7,%ymm6,%ymm3 | 
 | 	vpsrld	$25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpalignr	$4,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$12,%ymm6,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$4,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$12,%ymm5,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 |  | 
 | 	cmpq	%rcx,%r8 | 
 | 	jb	.Lopen_avx2_tail_384_rounds_and_x2hash | 
 | 	cmpq	$10,%r8 | 
 | 	jne	.Lopen_avx2_tail_384_rounds_and_x1hash | 
 | 	movq	%rbx,%r8 | 
 | 	subq	%rsi,%rbx | 
 | 	movq	%rbx,%rcx | 
 | 	movq	0+128(%rbp),%rbx | 
 | .Lopen_avx2_384_tail_hash: | 
 | 	addq	$16,%rcx | 
 | 	cmpq	%rbx,%rcx | 
 | 	jg	.Lopen_avx2_384_tail_done | 
 | 	addq	0+0(%r8),%r10 | 
 | 	adcq	8+0(%r8),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%r8),%r8 | 
 | 	jmp	.Lopen_avx2_384_tail_hash | 
 | .Lopen_avx2_384_tail_done: | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2 | 
 | 	vpaddd	0+64(%rbp),%ymm6,%ymm6 | 
 | 	vpaddd	0+96(%rbp),%ymm10,%ymm10 | 
 | 	vpaddd	0+224(%rbp),%ymm14,%ymm14 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1 | 
 | 	vpaddd	0+64(%rbp),%ymm5,%ymm5 | 
 | 	vpaddd	0+96(%rbp),%ymm9,%ymm9 | 
 | 	vpaddd	0+192(%rbp),%ymm13,%ymm13 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0 | 
 | 	vpaddd	0+64(%rbp),%ymm4,%ymm4 | 
 | 	vpaddd	0+96(%rbp),%ymm8,%ymm8 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 | 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6 | 
 | 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2 | 
 | 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10 | 
 | 	vpxor	0+0(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+0(%rsi),%ymm2,%ymm2 | 
 | 	vpxor	64+0(%rsi),%ymm6,%ymm6 | 
 | 	vpxor	96+0(%rsi),%ymm10,%ymm10 | 
 | 	vmovdqu	%ymm3,0+0(%rdi) | 
 | 	vmovdqu	%ymm2,32+0(%rdi) | 
 | 	vmovdqu	%ymm6,64+0(%rdi) | 
 | 	vmovdqu	%ymm10,96+0(%rdi) | 
 | 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5 | 
 | 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1 | 
 | 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9 | 
 | 	vpxor	0+128(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+128(%rsi),%ymm1,%ymm1 | 
 | 	vpxor	64+128(%rsi),%ymm5,%ymm5 | 
 | 	vpxor	96+128(%rsi),%ymm9,%ymm9 | 
 | 	vmovdqu	%ymm3,0+128(%rdi) | 
 | 	vmovdqu	%ymm1,32+128(%rdi) | 
 | 	vmovdqu	%ymm5,64+128(%rdi) | 
 | 	vmovdqu	%ymm9,96+128(%rdi) | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3 | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0 | 
 | 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm3,%ymm8 | 
 |  | 
 | 	leaq	256(%rsi),%rsi | 
 | 	leaq	256(%rdi),%rdi | 
 | 	subq	$256,%rbx | 
 | 	jmp	.Lopen_avx2_tail_128_xor | 
 |  | 
 | .Lopen_avx2_tail_512: | 
 | 	vmovdqa	.Lchacha20_consts(%rip),%ymm0 | 
 | 	vmovdqa	0+64(%rbp),%ymm4 | 
 | 	vmovdqa	0+96(%rbp),%ymm8 | 
 | 	vmovdqa	%ymm0,%ymm1 | 
 | 	vmovdqa	%ymm4,%ymm5 | 
 | 	vmovdqa	%ymm8,%ymm9 | 
 | 	vmovdqa	%ymm0,%ymm2 | 
 | 	vmovdqa	%ymm4,%ymm6 | 
 | 	vmovdqa	%ymm8,%ymm10 | 
 | 	vmovdqa	%ymm0,%ymm3 | 
 | 	vmovdqa	%ymm4,%ymm7 | 
 | 	vmovdqa	%ymm8,%ymm11 | 
 | 	vmovdqa	.Lavx2_inc(%rip),%ymm12 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm15 | 
 | 	vpaddd	%ymm15,%ymm12,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm12,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm15,0+256(%rbp) | 
 | 	vmovdqa	%ymm14,0+224(%rbp) | 
 | 	vmovdqa	%ymm13,0+192(%rbp) | 
 | 	vmovdqa	%ymm12,0+160(%rbp) | 
 |  | 
 | 	xorq	%rcx,%rcx | 
 | 	movq	%rsi,%r8 | 
 | .Lopen_avx2_tail_512_rounds_and_x2hash: | 
 | 	addq	0+0(%r8),%r10 | 
 | 	adcq	8+0(%r8),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%r8),%r8 | 
 | .Lopen_avx2_tail_512_rounds_and_x1hash: | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vmovdqa	.Lrol16(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$20,%ymm7,%ymm8 | 
 | 	vpslld	$32-20,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$20,%ymm6,%ymm8 | 
 | 	vpslld	$32-20,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm5,%ymm8 | 
 | 	vpslld	$32-20,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm4,%ymm8 | 
 | 	vpslld	$32-20,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	.Lrol8(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	addq	0+0(%r8),%r10 | 
 | 	adcq	8+0(%r8),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$25,%ymm7,%ymm8 | 
 | 	vpslld	$32-25,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$25,%ymm6,%ymm8 | 
 | 	vpslld	$32-25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$25,%ymm5,%ymm8 | 
 | 	vpslld	$32-25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$25,%ymm4,%ymm8 | 
 | 	vpslld	$32-25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	0+128(%rbp),%ymm8 | 
 | 	vpalignr	$4,%ymm7,%ymm7,%ymm7 | 
 | 	vpalignr	$8,%ymm11,%ymm11,%ymm11 | 
 | 	vpalignr	$12,%ymm15,%ymm15,%ymm15 | 
 | 	vpalignr	$4,%ymm6,%ymm6,%ymm6 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$12,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$4,%ymm5,%ymm5,%ymm5 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$12,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vmovdqa	.Lrol16(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	addq	0+16(%r8),%r10 | 
 | 	adcq	8+16(%r8),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	32(%r8),%r8 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$20,%ymm7,%ymm8 | 
 | 	vpslld	$32-20,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$20,%ymm6,%ymm8 | 
 | 	vpslld	$32-20,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm5,%ymm8 | 
 | 	vpslld	$32-20,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm4,%ymm8 | 
 | 	vpslld	$32-20,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	.Lrol8(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$25,%ymm7,%ymm8 | 
 | 	vpslld	$32-25,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$25,%ymm6,%ymm8 | 
 | 	vpslld	$32-25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$25,%ymm5,%ymm8 | 
 | 	vpslld	$32-25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$25,%ymm4,%ymm8 | 
 | 	vpslld	$32-25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	0+128(%rbp),%ymm8 | 
 | 	vpalignr	$12,%ymm7,%ymm7,%ymm7 | 
 | 	vpalignr	$8,%ymm11,%ymm11,%ymm11 | 
 | 	vpalignr	$4,%ymm15,%ymm15,%ymm15 | 
 | 	vpalignr	$12,%ymm6,%ymm6,%ymm6 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$4,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$12,%ymm5,%ymm5,%ymm5 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$4,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 |  | 
 | 	incq	%rcx | 
 | 	cmpq	$4,%rcx | 
 | 	jl	.Lopen_avx2_tail_512_rounds_and_x2hash | 
 | 	cmpq	$10,%rcx | 
 | 	jne	.Lopen_avx2_tail_512_rounds_and_x1hash | 
 | 	movq	%rbx,%rcx | 
 | 	subq	$384,%rcx | 
 | 	andq	$-16,%rcx | 
 | .Lopen_avx2_tail_512_hash: | 
 | 	testq	%rcx,%rcx | 
 | 	je	.Lopen_avx2_tail_512_done | 
 | 	addq	0+0(%r8),%r10 | 
 | 	adcq	8+0(%r8),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%r8),%r8 | 
 | 	subq	$16,%rcx | 
 | 	jmp	.Lopen_avx2_tail_512_hash | 
 | .Lopen_avx2_tail_512_done: | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3 | 
 | 	vpaddd	0+64(%rbp),%ymm7,%ymm7 | 
 | 	vpaddd	0+96(%rbp),%ymm11,%ymm11 | 
 | 	vpaddd	0+256(%rbp),%ymm15,%ymm15 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2 | 
 | 	vpaddd	0+64(%rbp),%ymm6,%ymm6 | 
 | 	vpaddd	0+96(%rbp),%ymm10,%ymm10 | 
 | 	vpaddd	0+224(%rbp),%ymm14,%ymm14 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1 | 
 | 	vpaddd	0+64(%rbp),%ymm5,%ymm5 | 
 | 	vpaddd	0+96(%rbp),%ymm9,%ymm9 | 
 | 	vpaddd	0+192(%rbp),%ymm13,%ymm13 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0 | 
 | 	vpaddd	0+64(%rbp),%ymm4,%ymm4 | 
 | 	vpaddd	0+96(%rbp),%ymm8,%ymm8 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 |  | 
 | 	vmovdqa	%ymm0,0+128(%rbp) | 
 | 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0 | 
 | 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7 | 
 | 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11 | 
 | 	vpxor	0+0(%rsi),%ymm0,%ymm0 | 
 | 	vpxor	32+0(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	64+0(%rsi),%ymm7,%ymm7 | 
 | 	vpxor	96+0(%rsi),%ymm11,%ymm11 | 
 | 	vmovdqu	%ymm0,0+0(%rdi) | 
 | 	vmovdqu	%ymm3,32+0(%rdi) | 
 | 	vmovdqu	%ymm7,64+0(%rdi) | 
 | 	vmovdqu	%ymm11,96+0(%rdi) | 
 |  | 
 | 	vmovdqa	0+128(%rbp),%ymm0 | 
 | 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6 | 
 | 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2 | 
 | 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10 | 
 | 	vpxor	0+128(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+128(%rsi),%ymm2,%ymm2 | 
 | 	vpxor	64+128(%rsi),%ymm6,%ymm6 | 
 | 	vpxor	96+128(%rsi),%ymm10,%ymm10 | 
 | 	vmovdqu	%ymm3,0+128(%rdi) | 
 | 	vmovdqu	%ymm2,32+128(%rdi) | 
 | 	vmovdqu	%ymm6,64+128(%rdi) | 
 | 	vmovdqu	%ymm10,96+128(%rdi) | 
 | 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5 | 
 | 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1 | 
 | 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9 | 
 | 	vpxor	0+256(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+256(%rsi),%ymm1,%ymm1 | 
 | 	vpxor	64+256(%rsi),%ymm5,%ymm5 | 
 | 	vpxor	96+256(%rsi),%ymm9,%ymm9 | 
 | 	vmovdqu	%ymm3,0+256(%rdi) | 
 | 	vmovdqu	%ymm1,32+256(%rdi) | 
 | 	vmovdqu	%ymm5,64+256(%rdi) | 
 | 	vmovdqu	%ymm9,96+256(%rdi) | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3 | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0 | 
 | 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm3,%ymm8 | 
 |  | 
 | 	leaq	384(%rsi),%rsi | 
 | 	leaq	384(%rdi),%rdi | 
 | 	subq	$384,%rbx | 
 | .Lopen_avx2_tail_128_xor: | 
 | 	cmpq	$32,%rbx | 
 | 	jb	.Lopen_avx2_tail_32_xor | 
 | 	subq	$32,%rbx | 
 | 	vpxor	(%rsi),%ymm0,%ymm0 | 
 | 	vmovdqu	%ymm0,(%rdi) | 
 | 	leaq	32(%rsi),%rsi | 
 | 	leaq	32(%rdi),%rdi | 
 | 	vmovdqa	%ymm4,%ymm0 | 
 | 	vmovdqa	%ymm8,%ymm4 | 
 | 	vmovdqa	%ymm12,%ymm8 | 
 | 	jmp	.Lopen_avx2_tail_128_xor | 
 | .Lopen_avx2_tail_32_xor: | 
 | 	cmpq	$16,%rbx | 
 | 	vmovdqa	%xmm0,%xmm1 | 
 | 	jb	.Lopen_avx2_exit | 
 | 	subq	$16,%rbx | 
 |  | 
 | 	vpxor	(%rsi),%xmm0,%xmm1 | 
 | 	vmovdqu	%xmm1,(%rdi) | 
 | 	leaq	16(%rsi),%rsi | 
 | 	leaq	16(%rdi),%rdi | 
 | 	vperm2i128	$0x11,%ymm0,%ymm0,%ymm0 | 
 | 	vmovdqa	%xmm0,%xmm1 | 
 | .Lopen_avx2_exit: | 
 | 	vzeroupper | 
 | 	jmp	.Lopen_sse_tail_16 | 
 |  | 
 | .Lopen_avx2_192: | 
 | 	vmovdqa	%ymm0,%ymm1 | 
 | 	vmovdqa	%ymm0,%ymm2 | 
 | 	vmovdqa	%ymm4,%ymm5 | 
 | 	vmovdqa	%ymm4,%ymm6 | 
 | 	vmovdqa	%ymm8,%ymm9 | 
 | 	vmovdqa	%ymm8,%ymm10 | 
 | 	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13 | 
 | 	vmovdqa	%ymm12,%ymm11 | 
 | 	vmovdqa	%ymm13,%ymm15 | 
 | 	movq	$10,%r10 | 
 | .Lopen_avx2_192_rounds: | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$12,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$4,%ymm5,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$4,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$12,%ymm5,%ymm5,%ymm5 | 
 |  | 
 | 	decq	%r10 | 
 | 	jne	.Lopen_avx2_192_rounds | 
 | 	vpaddd	%ymm2,%ymm0,%ymm0 | 
 | 	vpaddd	%ymm2,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm6,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm6,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm10,%ymm8,%ymm8 | 
 | 	vpaddd	%ymm10,%ymm9,%ymm9 | 
 | 	vpaddd	%ymm11,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm13,%ymm13 | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3 | 
 |  | 
 | 	vpand	.Lclamp(%rip),%ymm3,%ymm3 | 
 | 	vmovdqa	%ymm3,0+0(%rbp) | 
 |  | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4 | 
 | 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8 | 
 | 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12 | 
 | 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1 | 
 | 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5 | 
 | .Lopen_avx2_short: | 
 | 	movq	%r8,%r8 | 
 | 	call	poly_hash_ad_internal | 
 | .Lopen_avx2_short_hash_and_xor_loop: | 
 | 	cmpq	$32,%rbx | 
 | 	jb	.Lopen_avx2_short_tail_32 | 
 | 	subq	$32,%rbx | 
 | 	addq	0+0(%rsi),%r10 | 
 | 	adcq	8+0(%rsi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	addq	0+16(%rsi),%r10 | 
 | 	adcq	8+16(%rsi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 |  | 
 | 	vpxor	(%rsi),%ymm0,%ymm0 | 
 | 	vmovdqu	%ymm0,(%rdi) | 
 | 	leaq	32(%rsi),%rsi | 
 | 	leaq	32(%rdi),%rdi | 
 |  | 
 | 	vmovdqa	%ymm4,%ymm0 | 
 | 	vmovdqa	%ymm8,%ymm4 | 
 | 	vmovdqa	%ymm12,%ymm8 | 
 | 	vmovdqa	%ymm1,%ymm12 | 
 | 	vmovdqa	%ymm5,%ymm1 | 
 | 	vmovdqa	%ymm9,%ymm5 | 
 | 	vmovdqa	%ymm13,%ymm9 | 
 | 	vmovdqa	%ymm2,%ymm13 | 
 | 	vmovdqa	%ymm6,%ymm2 | 
 | 	jmp	.Lopen_avx2_short_hash_and_xor_loop | 
 | .Lopen_avx2_short_tail_32: | 
 | 	cmpq	$16,%rbx | 
 | 	vmovdqa	%xmm0,%xmm1 | 
 | 	jb	.Lopen_avx2_short_tail_32_exit | 
 | 	subq	$16,%rbx | 
 | 	addq	0+0(%rsi),%r10 | 
 | 	adcq	8+0(%rsi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	vpxor	(%rsi),%xmm0,%xmm3 | 
 | 	vmovdqu	%xmm3,(%rdi) | 
 | 	leaq	16(%rsi),%rsi | 
 | 	leaq	16(%rdi),%rdi | 
 | 	vextracti128	$1,%ymm0,%xmm1 | 
 | .Lopen_avx2_short_tail_32_exit: | 
 | 	vzeroupper | 
 | 	jmp	.Lopen_sse_tail_16 | 
 |  | 
 | .Lopen_avx2_320: | 
 | 	vmovdqa	%ymm0,%ymm1 | 
 | 	vmovdqa	%ymm0,%ymm2 | 
 | 	vmovdqa	%ymm4,%ymm5 | 
 | 	vmovdqa	%ymm4,%ymm6 | 
 | 	vmovdqa	%ymm8,%ymm9 | 
 | 	vmovdqa	%ymm8,%ymm10 | 
 | 	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13 | 
 | 	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm14 | 
 | 	vmovdqa	%ymm4,%ymm7 | 
 | 	vmovdqa	%ymm8,%ymm11 | 
 | 	vmovdqa	%ymm12,0+160(%rbp) | 
 | 	vmovdqa	%ymm13,0+192(%rbp) | 
 | 	vmovdqa	%ymm14,0+224(%rbp) | 
 | 	movq	$10,%r10 | 
 | .Lopen_avx2_320_rounds: | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$12,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$4,%ymm5,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol16(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm6,%ymm3 | 
 | 	vpslld	$12,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol8(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpslld	$7,%ymm6,%ymm3 | 
 | 	vpsrld	$25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpalignr	$12,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$4,%ymm6,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$4,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$12,%ymm5,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol16(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm6,%ymm3 | 
 | 	vpslld	$12,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol8(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpslld	$7,%ymm6,%ymm3 | 
 | 	vpsrld	$25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpalignr	$4,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$12,%ymm6,%ymm6,%ymm6 | 
 |  | 
 | 	decq	%r10 | 
 | 	jne	.Lopen_avx2_320_rounds | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2 | 
 | 	vpaddd	%ymm7,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm7,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm7,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm11,%ymm8,%ymm8 | 
 | 	vpaddd	%ymm11,%ymm9,%ymm9 | 
 | 	vpaddd	%ymm11,%ymm10,%ymm10 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 | 	vpaddd	0+192(%rbp),%ymm13,%ymm13 | 
 | 	vpaddd	0+224(%rbp),%ymm14,%ymm14 | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3 | 
 |  | 
 | 	vpand	.Lclamp(%rip),%ymm3,%ymm3 | 
 | 	vmovdqa	%ymm3,0+0(%rbp) | 
 |  | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4 | 
 | 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8 | 
 | 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12 | 
 | 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1 | 
 | 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5 | 
 | 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9 | 
 | 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13 | 
 | 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2 | 
 | 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6 | 
 | 	jmp	.Lopen_avx2_short | 
 | .size	chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2 | 
 | .cfi_endproc	 | 
 |  | 
 |  | 
 | .globl	chacha20_poly1305_seal_avx2 | 
 | .hidden chacha20_poly1305_seal_avx2 | 
 | .type	chacha20_poly1305_seal_avx2,@function | 
 | .align	64 | 
 | chacha20_poly1305_seal_avx2: | 
 | .cfi_startproc	 | 
 | _CET_ENDBR | 
 | 	pushq	%rbp | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%rbp,-16 | 
 | 	pushq	%rbx | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%rbx,-24 | 
 | 	pushq	%r12 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r12,-32 | 
 | 	pushq	%r13 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r13,-40 | 
 | 	pushq	%r14 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r14,-48 | 
 | 	pushq	%r15 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r15,-56 | 
 |  | 
 |  | 
 | 	pushq	%r9 | 
 | .cfi_adjust_cfa_offset	8 | 
 | .cfi_offset	%r9,-64 | 
 | 	subq	$288 + 0 + 32,%rsp | 
 | .cfi_adjust_cfa_offset	288 + 32 | 
 | 	leaq	32(%rsp),%rbp | 
 | 	andq	$-32,%rbp | 
 |  | 
 | 	movq	56(%r9),%rbx | 
 | 	addq	%rdx,%rbx | 
 | 	movq	%r8,0+0+32(%rbp) | 
 | 	movq	%rbx,8+0+32(%rbp) | 
 | 	movq	%rdx,%rbx | 
 |  | 
 | 	vzeroupper | 
 | 	vmovdqa	.Lchacha20_consts(%rip),%ymm0 | 
 | 	vbroadcasti128	0(%r9),%ymm4 | 
 | 	vbroadcasti128	16(%r9),%ymm8 | 
 | 	vbroadcasti128	32(%r9),%ymm12 | 
 | 	vpaddd	.Lavx2_init(%rip),%ymm12,%ymm12 | 
 | 	cmpq	$192,%rbx | 
 | 	jbe	.Lseal_avx2_192 | 
 | 	cmpq	$320,%rbx | 
 | 	jbe	.Lseal_avx2_320 | 
 | 	vmovdqa	%ymm0,%ymm1 | 
 | 	vmovdqa	%ymm0,%ymm2 | 
 | 	vmovdqa	%ymm0,%ymm3 | 
 | 	vmovdqa	%ymm4,%ymm5 | 
 | 	vmovdqa	%ymm4,%ymm6 | 
 | 	vmovdqa	%ymm4,%ymm7 | 
 | 	vmovdqa	%ymm4,0+64(%rbp) | 
 | 	vmovdqa	%ymm8,%ymm9 | 
 | 	vmovdqa	%ymm8,%ymm10 | 
 | 	vmovdqa	%ymm8,%ymm11 | 
 | 	vmovdqa	%ymm8,0+96(%rbp) | 
 | 	vmovdqa	%ymm12,%ymm15 | 
 | 	vpaddd	.Lavx2_inc(%rip),%ymm15,%ymm14 | 
 | 	vpaddd	.Lavx2_inc(%rip),%ymm14,%ymm13 | 
 | 	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm12 | 
 | 	vmovdqa	%ymm12,0+160(%rbp) | 
 | 	vmovdqa	%ymm13,0+192(%rbp) | 
 | 	vmovdqa	%ymm14,0+224(%rbp) | 
 | 	vmovdqa	%ymm15,0+256(%rbp) | 
 | 	movq	$10,%r10 | 
 | .Lseal_avx2_init_rounds: | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vmovdqa	.Lrol16(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$20,%ymm7,%ymm8 | 
 | 	vpslld	$32-20,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$20,%ymm6,%ymm8 | 
 | 	vpslld	$32-20,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm5,%ymm8 | 
 | 	vpslld	$32-20,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm4,%ymm8 | 
 | 	vpslld	$32-20,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	.Lrol8(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$25,%ymm7,%ymm8 | 
 | 	vpslld	$32-25,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$25,%ymm6,%ymm8 | 
 | 	vpslld	$32-25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$25,%ymm5,%ymm8 | 
 | 	vpslld	$32-25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$25,%ymm4,%ymm8 | 
 | 	vpslld	$32-25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	0+128(%rbp),%ymm8 | 
 | 	vpalignr	$4,%ymm7,%ymm7,%ymm7 | 
 | 	vpalignr	$8,%ymm11,%ymm11,%ymm11 | 
 | 	vpalignr	$12,%ymm15,%ymm15,%ymm15 | 
 | 	vpalignr	$4,%ymm6,%ymm6,%ymm6 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$12,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$4,%ymm5,%ymm5,%ymm5 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$12,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vmovdqa	.Lrol16(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$20,%ymm7,%ymm8 | 
 | 	vpslld	$32-20,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$20,%ymm6,%ymm8 | 
 | 	vpslld	$32-20,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm5,%ymm8 | 
 | 	vpslld	$32-20,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm4,%ymm8 | 
 | 	vpslld	$32-20,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	.Lrol8(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$25,%ymm7,%ymm8 | 
 | 	vpslld	$32-25,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$25,%ymm6,%ymm8 | 
 | 	vpslld	$32-25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$25,%ymm5,%ymm8 | 
 | 	vpslld	$32-25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$25,%ymm4,%ymm8 | 
 | 	vpslld	$32-25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	0+128(%rbp),%ymm8 | 
 | 	vpalignr	$12,%ymm7,%ymm7,%ymm7 | 
 | 	vpalignr	$8,%ymm11,%ymm11,%ymm11 | 
 | 	vpalignr	$4,%ymm15,%ymm15,%ymm15 | 
 | 	vpalignr	$12,%ymm6,%ymm6,%ymm6 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$4,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$12,%ymm5,%ymm5,%ymm5 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$4,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 |  | 
 | 	decq	%r10 | 
 | 	jnz	.Lseal_avx2_init_rounds | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3 | 
 | 	vpaddd	0+64(%rbp),%ymm7,%ymm7 | 
 | 	vpaddd	0+96(%rbp),%ymm11,%ymm11 | 
 | 	vpaddd	0+256(%rbp),%ymm15,%ymm15 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2 | 
 | 	vpaddd	0+64(%rbp),%ymm6,%ymm6 | 
 | 	vpaddd	0+96(%rbp),%ymm10,%ymm10 | 
 | 	vpaddd	0+224(%rbp),%ymm14,%ymm14 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1 | 
 | 	vpaddd	0+64(%rbp),%ymm5,%ymm5 | 
 | 	vpaddd	0+96(%rbp),%ymm9,%ymm9 | 
 | 	vpaddd	0+192(%rbp),%ymm13,%ymm13 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0 | 
 | 	vpaddd	0+64(%rbp),%ymm4,%ymm4 | 
 | 	vpaddd	0+96(%rbp),%ymm8,%ymm8 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 |  | 
 | 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11 | 
 | 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm15 | 
 | 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm3 | 
 | 	vpand	.Lclamp(%rip),%ymm15,%ymm15 | 
 | 	vmovdqa	%ymm15,0+0(%rbp) | 
 | 	movq	%r8,%r8 | 
 | 	call	poly_hash_ad_internal | 
 |  | 
 | 	vpxor	0(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32(%rsi),%ymm11,%ymm11 | 
 | 	vmovdqu	%ymm3,0(%rdi) | 
 | 	vmovdqu	%ymm11,32(%rdi) | 
 | 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm15 | 
 | 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6 | 
 | 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2 | 
 | 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10 | 
 | 	vpxor	0+64(%rsi),%ymm15,%ymm15 | 
 | 	vpxor	32+64(%rsi),%ymm2,%ymm2 | 
 | 	vpxor	64+64(%rsi),%ymm6,%ymm6 | 
 | 	vpxor	96+64(%rsi),%ymm10,%ymm10 | 
 | 	vmovdqu	%ymm15,0+64(%rdi) | 
 | 	vmovdqu	%ymm2,32+64(%rdi) | 
 | 	vmovdqu	%ymm6,64+64(%rdi) | 
 | 	vmovdqu	%ymm10,96+64(%rdi) | 
 | 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm15 | 
 | 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5 | 
 | 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1 | 
 | 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9 | 
 | 	vpxor	0+192(%rsi),%ymm15,%ymm15 | 
 | 	vpxor	32+192(%rsi),%ymm1,%ymm1 | 
 | 	vpxor	64+192(%rsi),%ymm5,%ymm5 | 
 | 	vpxor	96+192(%rsi),%ymm9,%ymm9 | 
 | 	vmovdqu	%ymm15,0+192(%rdi) | 
 | 	vmovdqu	%ymm1,32+192(%rdi) | 
 | 	vmovdqu	%ymm5,64+192(%rdi) | 
 | 	vmovdqu	%ymm9,96+192(%rdi) | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm15 | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0 | 
 | 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm15,%ymm8 | 
 |  | 
 | 	leaq	320(%rsi),%rsi | 
 | 	subq	$320,%rbx | 
 | 	movq	$320,%rcx | 
 | 	cmpq	$128,%rbx | 
 | 	jbe	.Lseal_avx2_short_hash_remainder | 
 | 	vpxor	0(%rsi),%ymm0,%ymm0 | 
 | 	vpxor	32(%rsi),%ymm4,%ymm4 | 
 | 	vpxor	64(%rsi),%ymm8,%ymm8 | 
 | 	vpxor	96(%rsi),%ymm12,%ymm12 | 
 | 	vmovdqu	%ymm0,320(%rdi) | 
 | 	vmovdqu	%ymm4,352(%rdi) | 
 | 	vmovdqu	%ymm8,384(%rdi) | 
 | 	vmovdqu	%ymm12,416(%rdi) | 
 | 	leaq	128(%rsi),%rsi | 
 | 	subq	$128,%rbx | 
 | 	movq	$8,%rcx | 
 | 	movq	$2,%r8 | 
 | 	cmpq	$128,%rbx | 
 | 	jbe	.Lseal_avx2_tail_128 | 
 | 	cmpq	$256,%rbx | 
 | 	jbe	.Lseal_avx2_tail_256 | 
 | 	cmpq	$384,%rbx | 
 | 	jbe	.Lseal_avx2_tail_384 | 
 | 	cmpq	$512,%rbx | 
 | 	jbe	.Lseal_avx2_tail_512 | 
 | 	vmovdqa	.Lchacha20_consts(%rip),%ymm0 | 
 | 	vmovdqa	0+64(%rbp),%ymm4 | 
 | 	vmovdqa	0+96(%rbp),%ymm8 | 
 | 	vmovdqa	%ymm0,%ymm1 | 
 | 	vmovdqa	%ymm4,%ymm5 | 
 | 	vmovdqa	%ymm8,%ymm9 | 
 | 	vmovdqa	%ymm0,%ymm2 | 
 | 	vmovdqa	%ymm4,%ymm6 | 
 | 	vmovdqa	%ymm8,%ymm10 | 
 | 	vmovdqa	%ymm0,%ymm3 | 
 | 	vmovdqa	%ymm4,%ymm7 | 
 | 	vmovdqa	%ymm8,%ymm11 | 
 | 	vmovdqa	.Lavx2_inc(%rip),%ymm12 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm15 | 
 | 	vpaddd	%ymm15,%ymm12,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm12,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm15,0+256(%rbp) | 
 | 	vmovdqa	%ymm14,0+224(%rbp) | 
 | 	vmovdqa	%ymm13,0+192(%rbp) | 
 | 	vmovdqa	%ymm12,0+160(%rbp) | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vmovdqa	.Lrol16(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$20,%ymm7,%ymm8 | 
 | 	vpslld	$32-20,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$20,%ymm6,%ymm8 | 
 | 	vpslld	$32-20,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm5,%ymm8 | 
 | 	vpslld	$32-20,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm4,%ymm8 | 
 | 	vpslld	$32-20,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	.Lrol8(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$25,%ymm7,%ymm8 | 
 | 	vpslld	$32-25,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$25,%ymm6,%ymm8 | 
 | 	vpslld	$32-25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$25,%ymm5,%ymm8 | 
 | 	vpslld	$32-25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$25,%ymm4,%ymm8 | 
 | 	vpslld	$32-25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	0+128(%rbp),%ymm8 | 
 | 	vpalignr	$4,%ymm7,%ymm7,%ymm7 | 
 | 	vpalignr	$8,%ymm11,%ymm11,%ymm11 | 
 | 	vpalignr	$12,%ymm15,%ymm15,%ymm15 | 
 | 	vpalignr	$4,%ymm6,%ymm6,%ymm6 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$12,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$4,%ymm5,%ymm5,%ymm5 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$12,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vmovdqa	.Lrol16(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$20,%ymm7,%ymm8 | 
 | 	vpslld	$32-20,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$20,%ymm6,%ymm8 | 
 | 	vpslld	$32-20,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm5,%ymm8 | 
 | 	vpslld	$32-20,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm4,%ymm8 | 
 | 	vpslld	$32-20,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	.Lrol8(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$25,%ymm7,%ymm8 | 
 | 	vpslld	$32-25,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$25,%ymm6,%ymm8 | 
 | 	vpslld	$32-25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$25,%ymm5,%ymm8 | 
 | 	vpslld	$32-25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$25,%ymm4,%ymm8 | 
 | 	vpslld	$32-25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	0+128(%rbp),%ymm8 | 
 | 	vpalignr	$12,%ymm7,%ymm7,%ymm7 | 
 | 	vpalignr	$8,%ymm11,%ymm11,%ymm11 | 
 | 	vpalignr	$4,%ymm15,%ymm15,%ymm15 | 
 | 	vpalignr	$12,%ymm6,%ymm6,%ymm6 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$4,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$12,%ymm5,%ymm5,%ymm5 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$4,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vmovdqa	.Lrol16(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$20,%ymm7,%ymm8 | 
 | 	vpslld	$32-20,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$20,%ymm6,%ymm8 | 
 | 	vpslld	$32-20,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm5,%ymm8 | 
 | 	vpslld	$32-20,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm4,%ymm8 | 
 | 	vpslld	$32-20,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	.Lrol8(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 |  | 
 | 	subq	$16,%rdi | 
 | 	movq	$9,%rcx | 
 | 	jmp	.Lseal_avx2_main_loop_rounds_entry | 
 | .align	32 | 
 | .Lseal_avx2_main_loop: | 
 | 	vmovdqa	.Lchacha20_consts(%rip),%ymm0 | 
 | 	vmovdqa	0+64(%rbp),%ymm4 | 
 | 	vmovdqa	0+96(%rbp),%ymm8 | 
 | 	vmovdqa	%ymm0,%ymm1 | 
 | 	vmovdqa	%ymm4,%ymm5 | 
 | 	vmovdqa	%ymm8,%ymm9 | 
 | 	vmovdqa	%ymm0,%ymm2 | 
 | 	vmovdqa	%ymm4,%ymm6 | 
 | 	vmovdqa	%ymm8,%ymm10 | 
 | 	vmovdqa	%ymm0,%ymm3 | 
 | 	vmovdqa	%ymm4,%ymm7 | 
 | 	vmovdqa	%ymm8,%ymm11 | 
 | 	vmovdqa	.Lavx2_inc(%rip),%ymm12 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm15 | 
 | 	vpaddd	%ymm15,%ymm12,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm12,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm15,0+256(%rbp) | 
 | 	vmovdqa	%ymm14,0+224(%rbp) | 
 | 	vmovdqa	%ymm13,0+192(%rbp) | 
 | 	vmovdqa	%ymm12,0+160(%rbp) | 
 |  | 
 | 	movq	$10,%rcx | 
 | .align	32 | 
 | .Lseal_avx2_main_loop_rounds: | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vmovdqa	.Lrol16(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$20,%ymm7,%ymm8 | 
 | 	vpslld	$32-20,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$20,%ymm6,%ymm8 | 
 | 	vpslld	$32-20,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm5,%ymm8 | 
 | 	vpslld	$32-20,%ymm5,%ymm5 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm4,%ymm8 | 
 | 	vpslld	$32-20,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	.Lrol8(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | .Lseal_avx2_main_loop_rounds_entry: | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	addq	0+16(%rdi),%r10 | 
 | 	adcq	8+16(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$25,%ymm7,%ymm8 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	vpslld	$32-25,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$25,%ymm6,%ymm8 | 
 | 	vpslld	$32-25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$25,%ymm5,%ymm8 | 
 | 	vpslld	$32-25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$25,%ymm4,%ymm8 | 
 | 	vpslld	$32-25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	0+128(%rbp),%ymm8 | 
 | 	vpalignr	$4,%ymm7,%ymm7,%ymm7 | 
 | 	vpalignr	$8,%ymm11,%ymm11,%ymm11 | 
 | 	vpalignr	$12,%ymm15,%ymm15,%ymm15 | 
 | 	vpalignr	$4,%ymm6,%ymm6,%ymm6 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$12,%ymm14,%ymm14,%ymm14 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	vpalignr	$4,%ymm5,%ymm5,%ymm5 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$12,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vmovdqa	.Lrol16(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$20,%ymm7,%ymm8 | 
 | 	vpslld	$32-20,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$20,%ymm6,%ymm8 | 
 | 	vpslld	$32-20,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	addq	0+32(%rdi),%r10 | 
 | 	adcq	8+32(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 |  | 
 | 	leaq	48(%rdi),%rdi | 
 | 	vpsrld	$20,%ymm5,%ymm8 | 
 | 	vpslld	$32-20,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm4,%ymm8 | 
 | 	vpslld	$32-20,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	.Lrol8(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$25,%ymm7,%ymm8 | 
 | 	vpslld	$32-25,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$25,%ymm6,%ymm8 | 
 | 	vpslld	$32-25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	vpsrld	$25,%ymm5,%ymm8 | 
 | 	vpslld	$32-25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$25,%ymm4,%ymm8 | 
 | 	vpslld	$32-25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	0+128(%rbp),%ymm8 | 
 | 	vpalignr	$12,%ymm7,%ymm7,%ymm7 | 
 | 	vpalignr	$8,%ymm11,%ymm11,%ymm11 | 
 | 	vpalignr	$4,%ymm15,%ymm15,%ymm15 | 
 | 	vpalignr	$12,%ymm6,%ymm6,%ymm6 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$4,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$12,%ymm5,%ymm5,%ymm5 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$4,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 |  | 
 | 	decq	%rcx | 
 | 	jne	.Lseal_avx2_main_loop_rounds | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3 | 
 | 	vpaddd	0+64(%rbp),%ymm7,%ymm7 | 
 | 	vpaddd	0+96(%rbp),%ymm11,%ymm11 | 
 | 	vpaddd	0+256(%rbp),%ymm15,%ymm15 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2 | 
 | 	vpaddd	0+64(%rbp),%ymm6,%ymm6 | 
 | 	vpaddd	0+96(%rbp),%ymm10,%ymm10 | 
 | 	vpaddd	0+224(%rbp),%ymm14,%ymm14 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1 | 
 | 	vpaddd	0+64(%rbp),%ymm5,%ymm5 | 
 | 	vpaddd	0+96(%rbp),%ymm9,%ymm9 | 
 | 	vpaddd	0+192(%rbp),%ymm13,%ymm13 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0 | 
 | 	vpaddd	0+64(%rbp),%ymm4,%ymm4 | 
 | 	vpaddd	0+96(%rbp),%ymm8,%ymm8 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 |  | 
 | 	vmovdqa	%ymm0,0+128(%rbp) | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	addq	0+16(%rdi),%r10 | 
 | 	adcq	8+16(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	32(%rdi),%rdi | 
 | 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0 | 
 | 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7 | 
 | 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11 | 
 | 	vpxor	0+0(%rsi),%ymm0,%ymm0 | 
 | 	vpxor	32+0(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	64+0(%rsi),%ymm7,%ymm7 | 
 | 	vpxor	96+0(%rsi),%ymm11,%ymm11 | 
 | 	vmovdqu	%ymm0,0+0(%rdi) | 
 | 	vmovdqu	%ymm3,32+0(%rdi) | 
 | 	vmovdqu	%ymm7,64+0(%rdi) | 
 | 	vmovdqu	%ymm11,96+0(%rdi) | 
 |  | 
 | 	vmovdqa	0+128(%rbp),%ymm0 | 
 | 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6 | 
 | 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2 | 
 | 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10 | 
 | 	vpxor	0+128(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+128(%rsi),%ymm2,%ymm2 | 
 | 	vpxor	64+128(%rsi),%ymm6,%ymm6 | 
 | 	vpxor	96+128(%rsi),%ymm10,%ymm10 | 
 | 	vmovdqu	%ymm3,0+128(%rdi) | 
 | 	vmovdqu	%ymm2,32+128(%rdi) | 
 | 	vmovdqu	%ymm6,64+128(%rdi) | 
 | 	vmovdqu	%ymm10,96+128(%rdi) | 
 | 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5 | 
 | 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1 | 
 | 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9 | 
 | 	vpxor	0+256(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+256(%rsi),%ymm1,%ymm1 | 
 | 	vpxor	64+256(%rsi),%ymm5,%ymm5 | 
 | 	vpxor	96+256(%rsi),%ymm9,%ymm9 | 
 | 	vmovdqu	%ymm3,0+256(%rdi) | 
 | 	vmovdqu	%ymm1,32+256(%rdi) | 
 | 	vmovdqu	%ymm5,64+256(%rdi) | 
 | 	vmovdqu	%ymm9,96+256(%rdi) | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4 | 
 | 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8 | 
 | 	vpxor	0+384(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+384(%rsi),%ymm0,%ymm0 | 
 | 	vpxor	64+384(%rsi),%ymm4,%ymm4 | 
 | 	vpxor	96+384(%rsi),%ymm8,%ymm8 | 
 | 	vmovdqu	%ymm3,0+384(%rdi) | 
 | 	vmovdqu	%ymm0,32+384(%rdi) | 
 | 	vmovdqu	%ymm4,64+384(%rdi) | 
 | 	vmovdqu	%ymm8,96+384(%rdi) | 
 |  | 
 | 	leaq	512(%rsi),%rsi | 
 | 	subq	$512,%rbx | 
 | 	cmpq	$512,%rbx | 
 | 	jg	.Lseal_avx2_main_loop | 
 |  | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	addq	0+16(%rdi),%r10 | 
 | 	adcq	8+16(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	32(%rdi),%rdi | 
 | 	movq	$10,%rcx | 
 | 	xorq	%r8,%r8 | 
 |  | 
 | 	cmpq	$384,%rbx | 
 | 	ja	.Lseal_avx2_tail_512 | 
 | 	cmpq	$256,%rbx | 
 | 	ja	.Lseal_avx2_tail_384 | 
 | 	cmpq	$128,%rbx | 
 | 	ja	.Lseal_avx2_tail_256 | 
 |  | 
 | .Lseal_avx2_tail_128: | 
 | 	vmovdqa	.Lchacha20_consts(%rip),%ymm0 | 
 | 	vmovdqa	0+64(%rbp),%ymm4 | 
 | 	vmovdqa	0+96(%rbp),%ymm8 | 
 | 	vmovdqa	.Lavx2_inc(%rip),%ymm12 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm12,0+160(%rbp) | 
 |  | 
 | .Lseal_avx2_tail_128_rounds_and_3xhash: | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%rdi),%rdi | 
 | .Lseal_avx2_tail_128_rounds_and_2xhash: | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 | 	addq	0+16(%rdi),%r10 | 
 | 	adcq	8+16(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	32(%rdi),%rdi | 
 | 	decq	%rcx | 
 | 	jg	.Lseal_avx2_tail_128_rounds_and_3xhash | 
 | 	decq	%r8 | 
 | 	jge	.Lseal_avx2_tail_128_rounds_and_2xhash | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0 | 
 | 	vpaddd	0+64(%rbp),%ymm4,%ymm4 | 
 | 	vpaddd	0+96(%rbp),%ymm8,%ymm8 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3 | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0 | 
 | 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm3,%ymm8 | 
 |  | 
 | 	jmp	.Lseal_avx2_short_loop | 
 |  | 
 | .Lseal_avx2_tail_256: | 
 | 	vmovdqa	.Lchacha20_consts(%rip),%ymm0 | 
 | 	vmovdqa	0+64(%rbp),%ymm4 | 
 | 	vmovdqa	0+96(%rbp),%ymm8 | 
 | 	vmovdqa	%ymm0,%ymm1 | 
 | 	vmovdqa	%ymm4,%ymm5 | 
 | 	vmovdqa	%ymm8,%ymm9 | 
 | 	vmovdqa	.Lavx2_inc(%rip),%ymm12 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm12,0+160(%rbp) | 
 | 	vmovdqa	%ymm13,0+192(%rbp) | 
 |  | 
 | .Lseal_avx2_tail_256_rounds_and_3xhash: | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%rdi),%rdi | 
 | .Lseal_avx2_tail_256_rounds_and_2xhash: | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$12,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$4,%ymm5,%ymm5,%ymm5 | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$4,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$12,%ymm5,%ymm5,%ymm5 | 
 | 	addq	0+16(%rdi),%r10 | 
 | 	adcq	8+16(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	32(%rdi),%rdi | 
 | 	decq	%rcx | 
 | 	jg	.Lseal_avx2_tail_256_rounds_and_3xhash | 
 | 	decq	%r8 | 
 | 	jge	.Lseal_avx2_tail_256_rounds_and_2xhash | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1 | 
 | 	vpaddd	0+64(%rbp),%ymm5,%ymm5 | 
 | 	vpaddd	0+96(%rbp),%ymm9,%ymm9 | 
 | 	vpaddd	0+192(%rbp),%ymm13,%ymm13 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0 | 
 | 	vpaddd	0+64(%rbp),%ymm4,%ymm4 | 
 | 	vpaddd	0+96(%rbp),%ymm8,%ymm8 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 | 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5 | 
 | 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1 | 
 | 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9 | 
 | 	vpxor	0+0(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+0(%rsi),%ymm1,%ymm1 | 
 | 	vpxor	64+0(%rsi),%ymm5,%ymm5 | 
 | 	vpxor	96+0(%rsi),%ymm9,%ymm9 | 
 | 	vmovdqu	%ymm3,0+0(%rdi) | 
 | 	vmovdqu	%ymm1,32+0(%rdi) | 
 | 	vmovdqu	%ymm5,64+0(%rdi) | 
 | 	vmovdqu	%ymm9,96+0(%rdi) | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3 | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0 | 
 | 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm3,%ymm8 | 
 |  | 
 | 	movq	$128,%rcx | 
 | 	leaq	128(%rsi),%rsi | 
 | 	subq	$128,%rbx | 
 | 	jmp	.Lseal_avx2_short_hash_remainder | 
 |  | 
 | .Lseal_avx2_tail_384: | 
 | 	vmovdqa	.Lchacha20_consts(%rip),%ymm0 | 
 | 	vmovdqa	0+64(%rbp),%ymm4 | 
 | 	vmovdqa	0+96(%rbp),%ymm8 | 
 | 	vmovdqa	%ymm0,%ymm1 | 
 | 	vmovdqa	%ymm4,%ymm5 | 
 | 	vmovdqa	%ymm8,%ymm9 | 
 | 	vmovdqa	%ymm0,%ymm2 | 
 | 	vmovdqa	%ymm4,%ymm6 | 
 | 	vmovdqa	%ymm8,%ymm10 | 
 | 	vmovdqa	.Lavx2_inc(%rip),%ymm12 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm12,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm12,0+160(%rbp) | 
 | 	vmovdqa	%ymm13,0+192(%rbp) | 
 | 	vmovdqa	%ymm14,0+224(%rbp) | 
 |  | 
 | .Lseal_avx2_tail_384_rounds_and_3xhash: | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%rdi),%rdi | 
 | .Lseal_avx2_tail_384_rounds_and_2xhash: | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$12,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$4,%ymm5,%ymm5,%ymm5 | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol16(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm6,%ymm3 | 
 | 	vpslld	$12,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol8(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpslld	$7,%ymm6,%ymm3 | 
 | 	vpsrld	$25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpalignr	$12,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$4,%ymm6,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 | 	addq	0+16(%rdi),%r10 | 
 | 	adcq	8+16(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$4,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$12,%ymm5,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol16(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm6,%ymm3 | 
 | 	vpslld	$12,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol8(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpslld	$7,%ymm6,%ymm3 | 
 | 	vpsrld	$25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpalignr	$4,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$12,%ymm6,%ymm6,%ymm6 | 
 |  | 
 | 	leaq	32(%rdi),%rdi | 
 | 	decq	%rcx | 
 | 	jg	.Lseal_avx2_tail_384_rounds_and_3xhash | 
 | 	decq	%r8 | 
 | 	jge	.Lseal_avx2_tail_384_rounds_and_2xhash | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2 | 
 | 	vpaddd	0+64(%rbp),%ymm6,%ymm6 | 
 | 	vpaddd	0+96(%rbp),%ymm10,%ymm10 | 
 | 	vpaddd	0+224(%rbp),%ymm14,%ymm14 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1 | 
 | 	vpaddd	0+64(%rbp),%ymm5,%ymm5 | 
 | 	vpaddd	0+96(%rbp),%ymm9,%ymm9 | 
 | 	vpaddd	0+192(%rbp),%ymm13,%ymm13 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0 | 
 | 	vpaddd	0+64(%rbp),%ymm4,%ymm4 | 
 | 	vpaddd	0+96(%rbp),%ymm8,%ymm8 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 | 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6 | 
 | 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2 | 
 | 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10 | 
 | 	vpxor	0+0(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+0(%rsi),%ymm2,%ymm2 | 
 | 	vpxor	64+0(%rsi),%ymm6,%ymm6 | 
 | 	vpxor	96+0(%rsi),%ymm10,%ymm10 | 
 | 	vmovdqu	%ymm3,0+0(%rdi) | 
 | 	vmovdqu	%ymm2,32+0(%rdi) | 
 | 	vmovdqu	%ymm6,64+0(%rdi) | 
 | 	vmovdqu	%ymm10,96+0(%rdi) | 
 | 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5 | 
 | 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1 | 
 | 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9 | 
 | 	vpxor	0+128(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+128(%rsi),%ymm1,%ymm1 | 
 | 	vpxor	64+128(%rsi),%ymm5,%ymm5 | 
 | 	vpxor	96+128(%rsi),%ymm9,%ymm9 | 
 | 	vmovdqu	%ymm3,0+128(%rdi) | 
 | 	vmovdqu	%ymm1,32+128(%rdi) | 
 | 	vmovdqu	%ymm5,64+128(%rdi) | 
 | 	vmovdqu	%ymm9,96+128(%rdi) | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3 | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0 | 
 | 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm3,%ymm8 | 
 |  | 
 | 	movq	$256,%rcx | 
 | 	leaq	256(%rsi),%rsi | 
 | 	subq	$256,%rbx | 
 | 	jmp	.Lseal_avx2_short_hash_remainder | 
 |  | 
 | .Lseal_avx2_tail_512: | 
 | 	vmovdqa	.Lchacha20_consts(%rip),%ymm0 | 
 | 	vmovdqa	0+64(%rbp),%ymm4 | 
 | 	vmovdqa	0+96(%rbp),%ymm8 | 
 | 	vmovdqa	%ymm0,%ymm1 | 
 | 	vmovdqa	%ymm4,%ymm5 | 
 | 	vmovdqa	%ymm8,%ymm9 | 
 | 	vmovdqa	%ymm0,%ymm2 | 
 | 	vmovdqa	%ymm4,%ymm6 | 
 | 	vmovdqa	%ymm8,%ymm10 | 
 | 	vmovdqa	%ymm0,%ymm3 | 
 | 	vmovdqa	%ymm4,%ymm7 | 
 | 	vmovdqa	%ymm8,%ymm11 | 
 | 	vmovdqa	.Lavx2_inc(%rip),%ymm12 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm15 | 
 | 	vpaddd	%ymm15,%ymm12,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm12,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm15,0+256(%rbp) | 
 | 	vmovdqa	%ymm14,0+224(%rbp) | 
 | 	vmovdqa	%ymm13,0+192(%rbp) | 
 | 	vmovdqa	%ymm12,0+160(%rbp) | 
 |  | 
 | .Lseal_avx2_tail_512_rounds_and_3xhash: | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%rdi),%rdi | 
 | .Lseal_avx2_tail_512_rounds_and_2xhash: | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vmovdqa	.Lrol16(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$20,%ymm7,%ymm8 | 
 | 	vpslld	$32-20,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$20,%ymm6,%ymm8 | 
 | 	vpslld	$32-20,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm5,%ymm8 | 
 | 	vpslld	$32-20,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm4,%ymm8 | 
 | 	vpslld	$32-20,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	.Lrol8(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$25,%ymm7,%ymm8 | 
 | 	vpslld	$32-25,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	vpsrld	$25,%ymm6,%ymm8 | 
 | 	vpslld	$32-25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$25,%ymm5,%ymm8 | 
 | 	vpslld	$32-25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$25,%ymm4,%ymm8 | 
 | 	vpslld	$32-25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	0+128(%rbp),%ymm8 | 
 | 	vpalignr	$4,%ymm7,%ymm7,%ymm7 | 
 | 	vpalignr	$8,%ymm11,%ymm11,%ymm11 | 
 | 	vpalignr	$12,%ymm15,%ymm15,%ymm15 | 
 | 	vpalignr	$4,%ymm6,%ymm6,%ymm6 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$12,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$4,%ymm5,%ymm5,%ymm5 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$12,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vmovdqa	.Lrol16(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$20,%ymm7,%ymm8 | 
 | 	vpslld	$32-20,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$20,%ymm6,%ymm8 | 
 | 	vpslld	$32-20,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm5,%ymm8 | 
 | 	vpslld	$32-20,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm4,%ymm8 | 
 | 	vpslld	$32-20,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	.Lrol8(%rip),%ymm8 | 
 | 	vpaddd	%ymm7,%ymm3,%ymm3 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	addq	0+16(%rdi),%r10 | 
 | 	adcq	8+16(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm3,%ymm15,%ymm15 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	%ymm8,%ymm15,%ymm15 | 
 | 	vpshufb	%ymm8,%ymm14,%ymm14 | 
 | 	vpshufb	%ymm8,%ymm13,%ymm13 | 
 | 	vpshufb	%ymm8,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm11,%ymm11 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpaddd	0+128(%rbp),%ymm12,%ymm8 | 
 | 	vpxor	%ymm11,%ymm7,%ymm7 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	%ymm8,0+128(%rbp) | 
 | 	vpsrld	$25,%ymm7,%ymm8 | 
 | 	movq	0+0+0(%rbp),%rdx | 
 | 	movq	%rdx,%r15 | 
 | 	mulxq	%r10,%r13,%r14 | 
 | 	mulxq	%r11,%rax,%rdx | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	vpslld	$32-25,%ymm7,%ymm7 | 
 | 	vpxor	%ymm8,%ymm7,%ymm7 | 
 | 	vpsrld	$25,%ymm6,%ymm8 | 
 | 	vpslld	$32-25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm8,%ymm6,%ymm6 | 
 | 	vpsrld	$25,%ymm5,%ymm8 | 
 | 	vpslld	$32-25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm8,%ymm5,%ymm5 | 
 | 	vpsrld	$25,%ymm4,%ymm8 | 
 | 	vpslld	$32-25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vmovdqa	0+128(%rbp),%ymm8 | 
 | 	vpalignr	$12,%ymm7,%ymm7,%ymm7 | 
 | 	vpalignr	$8,%ymm11,%ymm11,%ymm11 | 
 | 	vpalignr	$4,%ymm15,%ymm15,%ymm15 | 
 | 	vpalignr	$12,%ymm6,%ymm6,%ymm6 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$4,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$12,%ymm5,%ymm5,%ymm5 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	movq	8+0+0(%rbp),%rdx | 
 | 	mulxq	%r10,%r10,%rax | 
 | 	addq	%r10,%r14 | 
 | 	mulxq	%r11,%r11,%r9 | 
 | 	adcq	%r11,%r15 | 
 | 	adcq	$0,%r9 | 
 | 	imulq	%r12,%rdx | 
 | 	vpalignr	$4,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r9 | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	32(%rdi),%rdi | 
 | 	decq	%rcx | 
 | 	jg	.Lseal_avx2_tail_512_rounds_and_3xhash | 
 | 	decq	%r8 | 
 | 	jge	.Lseal_avx2_tail_512_rounds_and_2xhash | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3 | 
 | 	vpaddd	0+64(%rbp),%ymm7,%ymm7 | 
 | 	vpaddd	0+96(%rbp),%ymm11,%ymm11 | 
 | 	vpaddd	0+256(%rbp),%ymm15,%ymm15 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2 | 
 | 	vpaddd	0+64(%rbp),%ymm6,%ymm6 | 
 | 	vpaddd	0+96(%rbp),%ymm10,%ymm10 | 
 | 	vpaddd	0+224(%rbp),%ymm14,%ymm14 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1 | 
 | 	vpaddd	0+64(%rbp),%ymm5,%ymm5 | 
 | 	vpaddd	0+96(%rbp),%ymm9,%ymm9 | 
 | 	vpaddd	0+192(%rbp),%ymm13,%ymm13 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0 | 
 | 	vpaddd	0+64(%rbp),%ymm4,%ymm4 | 
 | 	vpaddd	0+96(%rbp),%ymm8,%ymm8 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 |  | 
 | 	vmovdqa	%ymm0,0+128(%rbp) | 
 | 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0 | 
 | 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7 | 
 | 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11 | 
 | 	vpxor	0+0(%rsi),%ymm0,%ymm0 | 
 | 	vpxor	32+0(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	64+0(%rsi),%ymm7,%ymm7 | 
 | 	vpxor	96+0(%rsi),%ymm11,%ymm11 | 
 | 	vmovdqu	%ymm0,0+0(%rdi) | 
 | 	vmovdqu	%ymm3,32+0(%rdi) | 
 | 	vmovdqu	%ymm7,64+0(%rdi) | 
 | 	vmovdqu	%ymm11,96+0(%rdi) | 
 |  | 
 | 	vmovdqa	0+128(%rbp),%ymm0 | 
 | 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6 | 
 | 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2 | 
 | 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10 | 
 | 	vpxor	0+128(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+128(%rsi),%ymm2,%ymm2 | 
 | 	vpxor	64+128(%rsi),%ymm6,%ymm6 | 
 | 	vpxor	96+128(%rsi),%ymm10,%ymm10 | 
 | 	vmovdqu	%ymm3,0+128(%rdi) | 
 | 	vmovdqu	%ymm2,32+128(%rdi) | 
 | 	vmovdqu	%ymm6,64+128(%rdi) | 
 | 	vmovdqu	%ymm10,96+128(%rdi) | 
 | 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3 | 
 | 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5 | 
 | 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1 | 
 | 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9 | 
 | 	vpxor	0+256(%rsi),%ymm3,%ymm3 | 
 | 	vpxor	32+256(%rsi),%ymm1,%ymm1 | 
 | 	vpxor	64+256(%rsi),%ymm5,%ymm5 | 
 | 	vpxor	96+256(%rsi),%ymm9,%ymm9 | 
 | 	vmovdqu	%ymm3,0+256(%rdi) | 
 | 	vmovdqu	%ymm1,32+256(%rdi) | 
 | 	vmovdqu	%ymm5,64+256(%rdi) | 
 | 	vmovdqu	%ymm9,96+256(%rdi) | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3 | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0 | 
 | 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12 | 
 | 	vmovdqa	%ymm3,%ymm8 | 
 |  | 
 | 	movq	$384,%rcx | 
 | 	leaq	384(%rsi),%rsi | 
 | 	subq	$384,%rbx | 
 | 	jmp	.Lseal_avx2_short_hash_remainder | 
 |  | 
 | .Lseal_avx2_320: | 
 | 	vmovdqa	%ymm0,%ymm1 | 
 | 	vmovdqa	%ymm0,%ymm2 | 
 | 	vmovdqa	%ymm4,%ymm5 | 
 | 	vmovdqa	%ymm4,%ymm6 | 
 | 	vmovdqa	%ymm8,%ymm9 | 
 | 	vmovdqa	%ymm8,%ymm10 | 
 | 	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13 | 
 | 	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm14 | 
 | 	vmovdqa	%ymm4,%ymm7 | 
 | 	vmovdqa	%ymm8,%ymm11 | 
 | 	vmovdqa	%ymm12,0+160(%rbp) | 
 | 	vmovdqa	%ymm13,0+192(%rbp) | 
 | 	vmovdqa	%ymm14,0+224(%rbp) | 
 | 	movq	$10,%r10 | 
 | .Lseal_avx2_320_rounds: | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$12,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$4,%ymm5,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol16(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm6,%ymm3 | 
 | 	vpslld	$12,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol8(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpslld	$7,%ymm6,%ymm3 | 
 | 	vpsrld	$25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpalignr	$12,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$4,%ymm6,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$4,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$12,%ymm5,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol16(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpsrld	$20,%ymm6,%ymm3 | 
 | 	vpslld	$12,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm6,%ymm2,%ymm2 | 
 | 	vpxor	%ymm2,%ymm14,%ymm14 | 
 | 	vpshufb	.Lrol8(%rip),%ymm14,%ymm14 | 
 | 	vpaddd	%ymm14,%ymm10,%ymm10 | 
 | 	vpxor	%ymm10,%ymm6,%ymm6 | 
 | 	vpslld	$7,%ymm6,%ymm3 | 
 | 	vpsrld	$25,%ymm6,%ymm6 | 
 | 	vpxor	%ymm3,%ymm6,%ymm6 | 
 | 	vpalignr	$4,%ymm14,%ymm14,%ymm14 | 
 | 	vpalignr	$8,%ymm10,%ymm10,%ymm10 | 
 | 	vpalignr	$12,%ymm6,%ymm6,%ymm6 | 
 |  | 
 | 	decq	%r10 | 
 | 	jne	.Lseal_avx2_320_rounds | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1 | 
 | 	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2 | 
 | 	vpaddd	%ymm7,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm7,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm7,%ymm6,%ymm6 | 
 | 	vpaddd	%ymm11,%ymm8,%ymm8 | 
 | 	vpaddd	%ymm11,%ymm9,%ymm9 | 
 | 	vpaddd	%ymm11,%ymm10,%ymm10 | 
 | 	vpaddd	0+160(%rbp),%ymm12,%ymm12 | 
 | 	vpaddd	0+192(%rbp),%ymm13,%ymm13 | 
 | 	vpaddd	0+224(%rbp),%ymm14,%ymm14 | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3 | 
 |  | 
 | 	vpand	.Lclamp(%rip),%ymm3,%ymm3 | 
 | 	vmovdqa	%ymm3,0+0(%rbp) | 
 |  | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4 | 
 | 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8 | 
 | 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12 | 
 | 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1 | 
 | 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5 | 
 | 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9 | 
 | 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13 | 
 | 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2 | 
 | 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6 | 
 | 	jmp	.Lseal_avx2_short | 
 |  | 
 | .Lseal_avx2_192: | 
 | 	vmovdqa	%ymm0,%ymm1 | 
 | 	vmovdqa	%ymm0,%ymm2 | 
 | 	vmovdqa	%ymm4,%ymm5 | 
 | 	vmovdqa	%ymm4,%ymm6 | 
 | 	vmovdqa	%ymm8,%ymm9 | 
 | 	vmovdqa	%ymm8,%ymm10 | 
 | 	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13 | 
 | 	vmovdqa	%ymm12,%ymm11 | 
 | 	vmovdqa	%ymm13,%ymm15 | 
 | 	movq	$10,%r10 | 
 | .Lseal_avx2_192_rounds: | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$12,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$4,%ymm4,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$12,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$4,%ymm5,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol16(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpsrld	$20,%ymm4,%ymm3 | 
 | 	vpslld	$12,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm4,%ymm0,%ymm0 | 
 | 	vpxor	%ymm0,%ymm12,%ymm12 | 
 | 	vpshufb	.Lrol8(%rip),%ymm12,%ymm12 | 
 | 	vpaddd	%ymm12,%ymm8,%ymm8 | 
 | 	vpxor	%ymm8,%ymm4,%ymm4 | 
 | 	vpslld	$7,%ymm4,%ymm3 | 
 | 	vpsrld	$25,%ymm4,%ymm4 | 
 | 	vpxor	%ymm3,%ymm4,%ymm4 | 
 | 	vpalignr	$4,%ymm12,%ymm12,%ymm12 | 
 | 	vpalignr	$8,%ymm8,%ymm8,%ymm8 | 
 | 	vpalignr	$12,%ymm4,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol16(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpsrld	$20,%ymm5,%ymm3 | 
 | 	vpslld	$12,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm5,%ymm1,%ymm1 | 
 | 	vpxor	%ymm1,%ymm13,%ymm13 | 
 | 	vpshufb	.Lrol8(%rip),%ymm13,%ymm13 | 
 | 	vpaddd	%ymm13,%ymm9,%ymm9 | 
 | 	vpxor	%ymm9,%ymm5,%ymm5 | 
 | 	vpslld	$7,%ymm5,%ymm3 | 
 | 	vpsrld	$25,%ymm5,%ymm5 | 
 | 	vpxor	%ymm3,%ymm5,%ymm5 | 
 | 	vpalignr	$4,%ymm13,%ymm13,%ymm13 | 
 | 	vpalignr	$8,%ymm9,%ymm9,%ymm9 | 
 | 	vpalignr	$12,%ymm5,%ymm5,%ymm5 | 
 |  | 
 | 	decq	%r10 | 
 | 	jne	.Lseal_avx2_192_rounds | 
 | 	vpaddd	%ymm2,%ymm0,%ymm0 | 
 | 	vpaddd	%ymm2,%ymm1,%ymm1 | 
 | 	vpaddd	%ymm6,%ymm4,%ymm4 | 
 | 	vpaddd	%ymm6,%ymm5,%ymm5 | 
 | 	vpaddd	%ymm10,%ymm8,%ymm8 | 
 | 	vpaddd	%ymm10,%ymm9,%ymm9 | 
 | 	vpaddd	%ymm11,%ymm12,%ymm12 | 
 | 	vpaddd	%ymm15,%ymm13,%ymm13 | 
 | 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3 | 
 |  | 
 | 	vpand	.Lclamp(%rip),%ymm3,%ymm3 | 
 | 	vmovdqa	%ymm3,0+0(%rbp) | 
 |  | 
 | 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0 | 
 | 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4 | 
 | 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8 | 
 | 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12 | 
 | 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1 | 
 | 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5 | 
 | .Lseal_avx2_short: | 
 | 	movq	%r8,%r8 | 
 | 	call	poly_hash_ad_internal | 
 | 	xorq	%rcx,%rcx | 
 | .Lseal_avx2_short_hash_remainder: | 
 | 	cmpq	$16,%rcx | 
 | 	jb	.Lseal_avx2_short_loop | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	subq	$16,%rcx | 
 | 	addq	$16,%rdi | 
 | 	jmp	.Lseal_avx2_short_hash_remainder | 
 | .Lseal_avx2_short_loop: | 
 | 	cmpq	$32,%rbx | 
 | 	jb	.Lseal_avx2_short_tail | 
 | 	subq	$32,%rbx | 
 |  | 
 | 	vpxor	(%rsi),%ymm0,%ymm0 | 
 | 	vmovdqu	%ymm0,(%rdi) | 
 | 	leaq	32(%rsi),%rsi | 
 |  | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 | 	addq	0+16(%rdi),%r10 | 
 | 	adcq	8+16(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	32(%rdi),%rdi | 
 |  | 
 | 	vmovdqa	%ymm4,%ymm0 | 
 | 	vmovdqa	%ymm8,%ymm4 | 
 | 	vmovdqa	%ymm12,%ymm8 | 
 | 	vmovdqa	%ymm1,%ymm12 | 
 | 	vmovdqa	%ymm5,%ymm1 | 
 | 	vmovdqa	%ymm9,%ymm5 | 
 | 	vmovdqa	%ymm13,%ymm9 | 
 | 	vmovdqa	%ymm2,%ymm13 | 
 | 	vmovdqa	%ymm6,%ymm2 | 
 | 	jmp	.Lseal_avx2_short_loop | 
 | .Lseal_avx2_short_tail: | 
 | 	cmpq	$16,%rbx | 
 | 	jb	.Lseal_avx2_exit | 
 | 	subq	$16,%rbx | 
 | 	vpxor	(%rsi),%xmm0,%xmm3 | 
 | 	vmovdqu	%xmm3,(%rdi) | 
 | 	leaq	16(%rsi),%rsi | 
 | 	addq	0+0(%rdi),%r10 | 
 | 	adcq	8+0(%rdi),%r11 | 
 | 	adcq	$1,%r12 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	movq	%rax,%r15 | 
 | 	mulq	%r10 | 
 | 	movq	%rax,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	movq	0+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	imulq	%r12,%r15 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	%rdx,%r15 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	movq	%rax,%r9 | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r10 | 
 | 	movq	8+0+0(%rbp),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r15 | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r12,%r9 | 
 | 	addq	%r10,%r15 | 
 | 	adcq	%rdx,%r9 | 
 | 	movq	%r13,%r10 | 
 | 	movq	%r14,%r11 | 
 | 	movq	%r15,%r12 | 
 | 	andq	$3,%r12 | 
 | 	movq	%r15,%r13 | 
 | 	andq	$-4,%r13 | 
 | 	movq	%r9,%r14 | 
 | 	shrdq	$2,%r9,%r15 | 
 | 	shrq	$2,%r9 | 
 | 	addq	%r13,%r15 | 
 | 	adcq	%r14,%r9 | 
 | 	addq	%r15,%r10 | 
 | 	adcq	%r9,%r11 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	leaq	16(%rdi),%rdi | 
 | 	vextracti128	$1,%ymm0,%xmm0 | 
 | .Lseal_avx2_exit: | 
 | 	vzeroupper | 
 | 	jmp	.Lseal_sse_tail_16 | 
 | .cfi_endproc	 | 
 | .size	chacha20_poly1305_seal_avx2, .-chacha20_poly1305_seal_avx2 | 
 | #endif |