| ; This file is generated from a similarly-named Perl script in the BoringSSL | 
 | ; source tree. Do not edit by hand. | 
 |  | 
 | %ifidn __OUTPUT_FORMAT__, win64 | 
 | default	rel | 
 | %define XMMWORD | 
 | %define YMMWORD | 
 | %define ZMMWORD | 
 | %define _CET_ENDBR | 
 |  | 
 | %ifdef BORINGSSL_PREFIX | 
 | %include "boringssl_prefix_symbols_nasm.inc" | 
 | %endif | 
 | section	.text code align=64 | 
 |  | 
 |  | 
 | section	.rdata rdata align=8 | 
 | ALIGN	64 | 
 | $L$zero: | 
 | 	DD	0,0,0,0 | 
 | $L$one: | 
 | 	DD	1,0,0,0 | 
 | $L$inc: | 
 | 	DD	0,1,2,3 | 
 | $L$four: | 
 | 	DD	4,4,4,4 | 
 | $L$incy: | 
 | 	DD	0,2,4,6,1,3,5,7 | 
 | $L$eight: | 
 | 	DD	8,8,8,8,8,8,8,8 | 
 | $L$rot16: | 
 | 	DB	0x2,0x3,0x0,0x1,0x6,0x7,0x4,0x5,0xa,0xb,0x8,0x9,0xe,0xf,0xc,0xd | 
 | $L$rot24: | 
 | 	DB	0x3,0x0,0x1,0x2,0x7,0x4,0x5,0x6,0xb,0x8,0x9,0xa,0xf,0xc,0xd,0xe | 
 | $L$sigma: | 
 | 	DB	101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107 | 
 | 	DB	0 | 
 | ALIGN	64 | 
 | $L$zeroz: | 
 | 	DD	0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0 | 
 | $L$fourz: | 
 | 	DD	4,0,0,0,4,0,0,0,4,0,0,0,4,0,0,0 | 
 | $L$incz: | 
 | 	DD	0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 | 
 | $L$sixteen: | 
 | 	DD	16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 | 
 | 	DB	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 | 
 | 	DB	95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32 | 
 | 	DB	98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115 | 
 | 	DB	108,46,111,114,103,62,0 | 
 | section	.text | 
 |  | 
 | global	ChaCha20_ctr32_nohw | 
 |  | 
 | ALIGN	64 | 
 | ChaCha20_ctr32_nohw: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_ChaCha20_ctr32_nohw: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 | 	mov	rcx,r9 | 
 | 	mov	r8,QWORD[40+rsp] | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	push	rbx | 
 |  | 
 | 	push	rbp | 
 |  | 
 | 	push	r12 | 
 |  | 
 | 	push	r13 | 
 |  | 
 | 	push	r14 | 
 |  | 
 | 	push	r15 | 
 |  | 
 | 	sub	rsp,64+24 | 
 |  | 
 | $L$ctr32_body: | 
 |  | 
 |  | 
 | 	movdqu	xmm1,XMMWORD[rcx] | 
 | 	movdqu	xmm2,XMMWORD[16+rcx] | 
 | 	movdqu	xmm3,XMMWORD[r8] | 
 | 	movdqa	xmm4,XMMWORD[$L$one] | 
 |  | 
 |  | 
 | 	movdqa	XMMWORD[16+rsp],xmm1 | 
 | 	movdqa	XMMWORD[32+rsp],xmm2 | 
 | 	movdqa	XMMWORD[48+rsp],xmm3 | 
 | 	mov	rbp,rdx | 
 | 	jmp	NEAR $L$oop_outer | 
 |  | 
 | ALIGN	32 | 
 | $L$oop_outer: | 
 | 	mov	eax,0x61707865 | 
 | 	mov	ebx,0x3320646e | 
 | 	mov	ecx,0x79622d32 | 
 | 	mov	edx,0x6b206574 | 
 | 	mov	r8d,DWORD[16+rsp] | 
 | 	mov	r9d,DWORD[20+rsp] | 
 | 	mov	r10d,DWORD[24+rsp] | 
 | 	mov	r11d,DWORD[28+rsp] | 
 | 	movd	r12d,xmm3 | 
 | 	mov	r13d,DWORD[52+rsp] | 
 | 	mov	r14d,DWORD[56+rsp] | 
 | 	mov	r15d,DWORD[60+rsp] | 
 |  | 
 | 	mov	QWORD[((64+0))+rsp],rbp | 
 | 	mov	ebp,10 | 
 | 	mov	QWORD[((64+8))+rsp],rsi | 
 | DB	102,72,15,126,214 | 
 | 	mov	QWORD[((64+16))+rsp],rdi | 
 | 	mov	rdi,rsi | 
 | 	shr	rdi,32 | 
 | 	jmp	NEAR $L$oop | 
 |  | 
 | ALIGN	32 | 
 | $L$oop: | 
 | 	add	eax,r8d | 
 | 	xor	r12d,eax | 
 | 	rol	r12d,16 | 
 | 	add	ebx,r9d | 
 | 	xor	r13d,ebx | 
 | 	rol	r13d,16 | 
 | 	add	esi,r12d | 
 | 	xor	r8d,esi | 
 | 	rol	r8d,12 | 
 | 	add	edi,r13d | 
 | 	xor	r9d,edi | 
 | 	rol	r9d,12 | 
 | 	add	eax,r8d | 
 | 	xor	r12d,eax | 
 | 	rol	r12d,8 | 
 | 	add	ebx,r9d | 
 | 	xor	r13d,ebx | 
 | 	rol	r13d,8 | 
 | 	add	esi,r12d | 
 | 	xor	r8d,esi | 
 | 	rol	r8d,7 | 
 | 	add	edi,r13d | 
 | 	xor	r9d,edi | 
 | 	rol	r9d,7 | 
 | 	mov	DWORD[32+rsp],esi | 
 | 	mov	DWORD[36+rsp],edi | 
 | 	mov	esi,DWORD[40+rsp] | 
 | 	mov	edi,DWORD[44+rsp] | 
 | 	add	ecx,r10d | 
 | 	xor	r14d,ecx | 
 | 	rol	r14d,16 | 
 | 	add	edx,r11d | 
 | 	xor	r15d,edx | 
 | 	rol	r15d,16 | 
 | 	add	esi,r14d | 
 | 	xor	r10d,esi | 
 | 	rol	r10d,12 | 
 | 	add	edi,r15d | 
 | 	xor	r11d,edi | 
 | 	rol	r11d,12 | 
 | 	add	ecx,r10d | 
 | 	xor	r14d,ecx | 
 | 	rol	r14d,8 | 
 | 	add	edx,r11d | 
 | 	xor	r15d,edx | 
 | 	rol	r15d,8 | 
 | 	add	esi,r14d | 
 | 	xor	r10d,esi | 
 | 	rol	r10d,7 | 
 | 	add	edi,r15d | 
 | 	xor	r11d,edi | 
 | 	rol	r11d,7 | 
 | 	add	eax,r9d | 
 | 	xor	r15d,eax | 
 | 	rol	r15d,16 | 
 | 	add	ebx,r10d | 
 | 	xor	r12d,ebx | 
 | 	rol	r12d,16 | 
 | 	add	esi,r15d | 
 | 	xor	r9d,esi | 
 | 	rol	r9d,12 | 
 | 	add	edi,r12d | 
 | 	xor	r10d,edi | 
 | 	rol	r10d,12 | 
 | 	add	eax,r9d | 
 | 	xor	r15d,eax | 
 | 	rol	r15d,8 | 
 | 	add	ebx,r10d | 
 | 	xor	r12d,ebx | 
 | 	rol	r12d,8 | 
 | 	add	esi,r15d | 
 | 	xor	r9d,esi | 
 | 	rol	r9d,7 | 
 | 	add	edi,r12d | 
 | 	xor	r10d,edi | 
 | 	rol	r10d,7 | 
 | 	mov	DWORD[40+rsp],esi | 
 | 	mov	DWORD[44+rsp],edi | 
 | 	mov	esi,DWORD[32+rsp] | 
 | 	mov	edi,DWORD[36+rsp] | 
 | 	add	ecx,r11d | 
 | 	xor	r13d,ecx | 
 | 	rol	r13d,16 | 
 | 	add	edx,r8d | 
 | 	xor	r14d,edx | 
 | 	rol	r14d,16 | 
 | 	add	esi,r13d | 
 | 	xor	r11d,esi | 
 | 	rol	r11d,12 | 
 | 	add	edi,r14d | 
 | 	xor	r8d,edi | 
 | 	rol	r8d,12 | 
 | 	add	ecx,r11d | 
 | 	xor	r13d,ecx | 
 | 	rol	r13d,8 | 
 | 	add	edx,r8d | 
 | 	xor	r14d,edx | 
 | 	rol	r14d,8 | 
 | 	add	esi,r13d | 
 | 	xor	r11d,esi | 
 | 	rol	r11d,7 | 
 | 	add	edi,r14d | 
 | 	xor	r8d,edi | 
 | 	rol	r8d,7 | 
 | 	dec	ebp | 
 | 	jnz	NEAR $L$oop | 
 | 	mov	DWORD[36+rsp],edi | 
 | 	mov	DWORD[32+rsp],esi | 
 | 	mov	rbp,QWORD[64+rsp] | 
 | 	movdqa	xmm1,xmm2 | 
 | 	mov	rsi,QWORD[((64+8))+rsp] | 
 | 	paddd	xmm3,xmm4 | 
 | 	mov	rdi,QWORD[((64+16))+rsp] | 
 |  | 
 | 	add	eax,0x61707865 | 
 | 	add	ebx,0x3320646e | 
 | 	add	ecx,0x79622d32 | 
 | 	add	edx,0x6b206574 | 
 | 	add	r8d,DWORD[16+rsp] | 
 | 	add	r9d,DWORD[20+rsp] | 
 | 	add	r10d,DWORD[24+rsp] | 
 | 	add	r11d,DWORD[28+rsp] | 
 | 	add	r12d,DWORD[48+rsp] | 
 | 	add	r13d,DWORD[52+rsp] | 
 | 	add	r14d,DWORD[56+rsp] | 
 | 	add	r15d,DWORD[60+rsp] | 
 | 	paddd	xmm1,XMMWORD[32+rsp] | 
 |  | 
 | 	cmp	rbp,64 | 
 | 	jb	NEAR $L$tail | 
 |  | 
 | 	xor	eax,DWORD[rsi] | 
 | 	xor	ebx,DWORD[4+rsi] | 
 | 	xor	ecx,DWORD[8+rsi] | 
 | 	xor	edx,DWORD[12+rsi] | 
 | 	xor	r8d,DWORD[16+rsi] | 
 | 	xor	r9d,DWORD[20+rsi] | 
 | 	xor	r10d,DWORD[24+rsi] | 
 | 	xor	r11d,DWORD[28+rsi] | 
 | 	movdqu	xmm0,XMMWORD[32+rsi] | 
 | 	xor	r12d,DWORD[48+rsi] | 
 | 	xor	r13d,DWORD[52+rsi] | 
 | 	xor	r14d,DWORD[56+rsi] | 
 | 	xor	r15d,DWORD[60+rsi] | 
 | 	lea	rsi,[64+rsi] | 
 | 	pxor	xmm0,xmm1 | 
 |  | 
 | 	movdqa	XMMWORD[32+rsp],xmm2 | 
 | 	movd	DWORD[48+rsp],xmm3 | 
 |  | 
 | 	mov	DWORD[rdi],eax | 
 | 	mov	DWORD[4+rdi],ebx | 
 | 	mov	DWORD[8+rdi],ecx | 
 | 	mov	DWORD[12+rdi],edx | 
 | 	mov	DWORD[16+rdi],r8d | 
 | 	mov	DWORD[20+rdi],r9d | 
 | 	mov	DWORD[24+rdi],r10d | 
 | 	mov	DWORD[28+rdi],r11d | 
 | 	movdqu	XMMWORD[32+rdi],xmm0 | 
 | 	mov	DWORD[48+rdi],r12d | 
 | 	mov	DWORD[52+rdi],r13d | 
 | 	mov	DWORD[56+rdi],r14d | 
 | 	mov	DWORD[60+rdi],r15d | 
 | 	lea	rdi,[64+rdi] | 
 |  | 
 | 	sub	rbp,64 | 
 | 	jnz	NEAR $L$oop_outer | 
 |  | 
 | 	jmp	NEAR $L$done | 
 |  | 
 | ALIGN	16 | 
 | $L$tail: | 
 | 	mov	DWORD[rsp],eax | 
 | 	mov	DWORD[4+rsp],ebx | 
 | 	xor	rbx,rbx | 
 | 	mov	DWORD[8+rsp],ecx | 
 | 	mov	DWORD[12+rsp],edx | 
 | 	mov	DWORD[16+rsp],r8d | 
 | 	mov	DWORD[20+rsp],r9d | 
 | 	mov	DWORD[24+rsp],r10d | 
 | 	mov	DWORD[28+rsp],r11d | 
 | 	movdqa	XMMWORD[32+rsp],xmm1 | 
 | 	mov	DWORD[48+rsp],r12d | 
 | 	mov	DWORD[52+rsp],r13d | 
 | 	mov	DWORD[56+rsp],r14d | 
 | 	mov	DWORD[60+rsp],r15d | 
 |  | 
 | $L$oop_tail: | 
 | 	movzx	eax,BYTE[rbx*1+rsi] | 
 | 	movzx	edx,BYTE[rbx*1+rsp] | 
 | 	lea	rbx,[1+rbx] | 
 | 	xor	eax,edx | 
 | 	mov	BYTE[((-1))+rbx*1+rdi],al | 
 | 	dec	rbp | 
 | 	jnz	NEAR $L$oop_tail | 
 |  | 
 | $L$done: | 
 | 	lea	rsi,[((64+24+48))+rsp] | 
 | 	mov	r15,QWORD[((-48))+rsi] | 
 |  | 
 | 	mov	r14,QWORD[((-40))+rsi] | 
 |  | 
 | 	mov	r13,QWORD[((-32))+rsi] | 
 |  | 
 | 	mov	r12,QWORD[((-24))+rsi] | 
 |  | 
 | 	mov	rbp,QWORD[((-16))+rsi] | 
 |  | 
 | 	mov	rbx,QWORD[((-8))+rsi] | 
 |  | 
 | 	lea	rsp,[rsi] | 
 |  | 
 | $L$no_data: | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_ChaCha20_ctr32_nohw: | 
 | global	ChaCha20_ctr32_ssse3 | 
 |  | 
 | ALIGN	32 | 
 | ChaCha20_ctr32_ssse3: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_ChaCha20_ctr32_ssse3: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 | 	mov	rcx,r9 | 
 | 	mov	r8,QWORD[40+rsp] | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	mov	r9,rsp | 
 |  | 
 | 	sub	rsp,64+40 | 
 | 	movaps	XMMWORD[(-40)+r9],xmm6 | 
 | 	movaps	XMMWORD[(-24)+r9],xmm7 | 
 | $L$ssse3_body: | 
 | 	movdqa	xmm0,XMMWORD[$L$sigma] | 
 | 	movdqu	xmm1,XMMWORD[rcx] | 
 | 	movdqu	xmm2,XMMWORD[16+rcx] | 
 | 	movdqu	xmm3,XMMWORD[r8] | 
 | 	movdqa	xmm6,XMMWORD[$L$rot16] | 
 | 	movdqa	xmm7,XMMWORD[$L$rot24] | 
 |  | 
 | 	movdqa	XMMWORD[rsp],xmm0 | 
 | 	movdqa	XMMWORD[16+rsp],xmm1 | 
 | 	movdqa	XMMWORD[32+rsp],xmm2 | 
 | 	movdqa	XMMWORD[48+rsp],xmm3 | 
 | 	mov	r8,10 | 
 | 	jmp	NEAR $L$oop_ssse3 | 
 |  | 
 | ALIGN	32 | 
 | $L$oop_outer_ssse3: | 
 | 	movdqa	xmm3,XMMWORD[$L$one] | 
 | 	movdqa	xmm0,XMMWORD[rsp] | 
 | 	movdqa	xmm1,XMMWORD[16+rsp] | 
 | 	movdqa	xmm2,XMMWORD[32+rsp] | 
 | 	paddd	xmm3,XMMWORD[48+rsp] | 
 | 	mov	r8,10 | 
 | 	movdqa	XMMWORD[48+rsp],xmm3 | 
 | 	jmp	NEAR $L$oop_ssse3 | 
 |  | 
 | ALIGN	32 | 
 | $L$oop_ssse3: | 
 | 	paddd	xmm0,xmm1 | 
 | 	pxor	xmm3,xmm0 | 
 | DB	102,15,56,0,222 | 
 | 	paddd	xmm2,xmm3 | 
 | 	pxor	xmm1,xmm2 | 
 | 	movdqa	xmm4,xmm1 | 
 | 	psrld	xmm1,20 | 
 | 	pslld	xmm4,12 | 
 | 	por	xmm1,xmm4 | 
 | 	paddd	xmm0,xmm1 | 
 | 	pxor	xmm3,xmm0 | 
 | DB	102,15,56,0,223 | 
 | 	paddd	xmm2,xmm3 | 
 | 	pxor	xmm1,xmm2 | 
 | 	movdqa	xmm4,xmm1 | 
 | 	psrld	xmm1,25 | 
 | 	pslld	xmm4,7 | 
 | 	por	xmm1,xmm4 | 
 | 	pshufd	xmm2,xmm2,78 | 
 | 	pshufd	xmm1,xmm1,57 | 
 | 	pshufd	xmm3,xmm3,147 | 
 | 	nop | 
 | 	paddd	xmm0,xmm1 | 
 | 	pxor	xmm3,xmm0 | 
 | DB	102,15,56,0,222 | 
 | 	paddd	xmm2,xmm3 | 
 | 	pxor	xmm1,xmm2 | 
 | 	movdqa	xmm4,xmm1 | 
 | 	psrld	xmm1,20 | 
 | 	pslld	xmm4,12 | 
 | 	por	xmm1,xmm4 | 
 | 	paddd	xmm0,xmm1 | 
 | 	pxor	xmm3,xmm0 | 
 | DB	102,15,56,0,223 | 
 | 	paddd	xmm2,xmm3 | 
 | 	pxor	xmm1,xmm2 | 
 | 	movdqa	xmm4,xmm1 | 
 | 	psrld	xmm1,25 | 
 | 	pslld	xmm4,7 | 
 | 	por	xmm1,xmm4 | 
 | 	pshufd	xmm2,xmm2,78 | 
 | 	pshufd	xmm1,xmm1,147 | 
 | 	pshufd	xmm3,xmm3,57 | 
 | 	dec	r8 | 
 | 	jnz	NEAR $L$oop_ssse3 | 
 | 	paddd	xmm0,XMMWORD[rsp] | 
 | 	paddd	xmm1,XMMWORD[16+rsp] | 
 | 	paddd	xmm2,XMMWORD[32+rsp] | 
 | 	paddd	xmm3,XMMWORD[48+rsp] | 
 |  | 
 | 	cmp	rdx,64 | 
 | 	jb	NEAR $L$tail_ssse3 | 
 |  | 
 | 	movdqu	xmm4,XMMWORD[rsi] | 
 | 	movdqu	xmm5,XMMWORD[16+rsi] | 
 | 	pxor	xmm0,xmm4 | 
 | 	movdqu	xmm4,XMMWORD[32+rsi] | 
 | 	pxor	xmm1,xmm5 | 
 | 	movdqu	xmm5,XMMWORD[48+rsi] | 
 | 	lea	rsi,[64+rsi] | 
 | 	pxor	xmm2,xmm4 | 
 | 	pxor	xmm3,xmm5 | 
 |  | 
 | 	movdqu	XMMWORD[rdi],xmm0 | 
 | 	movdqu	XMMWORD[16+rdi],xmm1 | 
 | 	movdqu	XMMWORD[32+rdi],xmm2 | 
 | 	movdqu	XMMWORD[48+rdi],xmm3 | 
 | 	lea	rdi,[64+rdi] | 
 |  | 
 | 	sub	rdx,64 | 
 | 	jnz	NEAR $L$oop_outer_ssse3 | 
 |  | 
 | 	jmp	NEAR $L$done_ssse3 | 
 |  | 
 | ALIGN	16 | 
 | $L$tail_ssse3: | 
 | 	movdqa	XMMWORD[rsp],xmm0 | 
 | 	movdqa	XMMWORD[16+rsp],xmm1 | 
 | 	movdqa	XMMWORD[32+rsp],xmm2 | 
 | 	movdqa	XMMWORD[48+rsp],xmm3 | 
 | 	xor	r8,r8 | 
 |  | 
 | $L$oop_tail_ssse3: | 
 | 	movzx	eax,BYTE[r8*1+rsi] | 
 | 	movzx	ecx,BYTE[r8*1+rsp] | 
 | 	lea	r8,[1+r8] | 
 | 	xor	eax,ecx | 
 | 	mov	BYTE[((-1))+r8*1+rdi],al | 
 | 	dec	rdx | 
 | 	jnz	NEAR $L$oop_tail_ssse3 | 
 |  | 
 | $L$done_ssse3: | 
 | 	movaps	xmm6,XMMWORD[((-40))+r9] | 
 | 	movaps	xmm7,XMMWORD[((-24))+r9] | 
 | 	lea	rsp,[r9] | 
 |  | 
 | $L$ssse3_epilogue: | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_ChaCha20_ctr32_ssse3: | 
 | global	ChaCha20_ctr32_ssse3_4x | 
 |  | 
 | ALIGN	32 | 
 | ChaCha20_ctr32_ssse3_4x: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_ChaCha20_ctr32_ssse3_4x: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 | 	mov	rcx,r9 | 
 | 	mov	r8,QWORD[40+rsp] | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	mov	r9,rsp | 
 |  | 
 | 	sub	rsp,0x140+168 | 
 | 	movaps	XMMWORD[(-168)+r9],xmm6 | 
 | 	movaps	XMMWORD[(-152)+r9],xmm7 | 
 | 	movaps	XMMWORD[(-136)+r9],xmm8 | 
 | 	movaps	XMMWORD[(-120)+r9],xmm9 | 
 | 	movaps	XMMWORD[(-104)+r9],xmm10 | 
 | 	movaps	XMMWORD[(-88)+r9],xmm11 | 
 | 	movaps	XMMWORD[(-72)+r9],xmm12 | 
 | 	movaps	XMMWORD[(-56)+r9],xmm13 | 
 | 	movaps	XMMWORD[(-40)+r9],xmm14 | 
 | 	movaps	XMMWORD[(-24)+r9],xmm15 | 
 | $L$4x_body: | 
 | 	movdqa	xmm11,XMMWORD[$L$sigma] | 
 | 	movdqu	xmm15,XMMWORD[rcx] | 
 | 	movdqu	xmm7,XMMWORD[16+rcx] | 
 | 	movdqu	xmm3,XMMWORD[r8] | 
 | 	lea	rcx,[256+rsp] | 
 | 	lea	r10,[$L$rot16] | 
 | 	lea	r11,[$L$rot24] | 
 |  | 
 | 	pshufd	xmm8,xmm11,0x00 | 
 | 	pshufd	xmm9,xmm11,0x55 | 
 | 	movdqa	XMMWORD[64+rsp],xmm8 | 
 | 	pshufd	xmm10,xmm11,0xaa | 
 | 	movdqa	XMMWORD[80+rsp],xmm9 | 
 | 	pshufd	xmm11,xmm11,0xff | 
 | 	movdqa	XMMWORD[96+rsp],xmm10 | 
 | 	movdqa	XMMWORD[112+rsp],xmm11 | 
 |  | 
 | 	pshufd	xmm12,xmm15,0x00 | 
 | 	pshufd	xmm13,xmm15,0x55 | 
 | 	movdqa	XMMWORD[(128-256)+rcx],xmm12 | 
 | 	pshufd	xmm14,xmm15,0xaa | 
 | 	movdqa	XMMWORD[(144-256)+rcx],xmm13 | 
 | 	pshufd	xmm15,xmm15,0xff | 
 | 	movdqa	XMMWORD[(160-256)+rcx],xmm14 | 
 | 	movdqa	XMMWORD[(176-256)+rcx],xmm15 | 
 |  | 
 | 	pshufd	xmm4,xmm7,0x00 | 
 | 	pshufd	xmm5,xmm7,0x55 | 
 | 	movdqa	XMMWORD[(192-256)+rcx],xmm4 | 
 | 	pshufd	xmm6,xmm7,0xaa | 
 | 	movdqa	XMMWORD[(208-256)+rcx],xmm5 | 
 | 	pshufd	xmm7,xmm7,0xff | 
 | 	movdqa	XMMWORD[(224-256)+rcx],xmm6 | 
 | 	movdqa	XMMWORD[(240-256)+rcx],xmm7 | 
 |  | 
 | 	pshufd	xmm0,xmm3,0x00 | 
 | 	pshufd	xmm1,xmm3,0x55 | 
 | 	paddd	xmm0,XMMWORD[$L$inc] | 
 | 	pshufd	xmm2,xmm3,0xaa | 
 | 	movdqa	XMMWORD[(272-256)+rcx],xmm1 | 
 | 	pshufd	xmm3,xmm3,0xff | 
 | 	movdqa	XMMWORD[(288-256)+rcx],xmm2 | 
 | 	movdqa	XMMWORD[(304-256)+rcx],xmm3 | 
 |  | 
 | 	jmp	NEAR $L$oop_enter4x | 
 |  | 
 | ALIGN	32 | 
 | $L$oop_outer4x: | 
 | 	movdqa	xmm8,XMMWORD[64+rsp] | 
 | 	movdqa	xmm9,XMMWORD[80+rsp] | 
 | 	movdqa	xmm10,XMMWORD[96+rsp] | 
 | 	movdqa	xmm11,XMMWORD[112+rsp] | 
 | 	movdqa	xmm12,XMMWORD[((128-256))+rcx] | 
 | 	movdqa	xmm13,XMMWORD[((144-256))+rcx] | 
 | 	movdqa	xmm14,XMMWORD[((160-256))+rcx] | 
 | 	movdqa	xmm15,XMMWORD[((176-256))+rcx] | 
 | 	movdqa	xmm4,XMMWORD[((192-256))+rcx] | 
 | 	movdqa	xmm5,XMMWORD[((208-256))+rcx] | 
 | 	movdqa	xmm6,XMMWORD[((224-256))+rcx] | 
 | 	movdqa	xmm7,XMMWORD[((240-256))+rcx] | 
 | 	movdqa	xmm0,XMMWORD[((256-256))+rcx] | 
 | 	movdqa	xmm1,XMMWORD[((272-256))+rcx] | 
 | 	movdqa	xmm2,XMMWORD[((288-256))+rcx] | 
 | 	movdqa	xmm3,XMMWORD[((304-256))+rcx] | 
 | 	paddd	xmm0,XMMWORD[$L$four] | 
 |  | 
 | $L$oop_enter4x: | 
 | 	movdqa	XMMWORD[32+rsp],xmm6 | 
 | 	movdqa	XMMWORD[48+rsp],xmm7 | 
 | 	movdqa	xmm7,XMMWORD[r10] | 
 | 	mov	eax,10 | 
 | 	movdqa	XMMWORD[(256-256)+rcx],xmm0 | 
 | 	jmp	NEAR $L$oop4x | 
 |  | 
 | ALIGN	32 | 
 | $L$oop4x: | 
 | 	paddd	xmm8,xmm12 | 
 | 	paddd	xmm9,xmm13 | 
 | 	pxor	xmm0,xmm8 | 
 | 	pxor	xmm1,xmm9 | 
 | DB	102,15,56,0,199 | 
 | DB	102,15,56,0,207 | 
 | 	paddd	xmm4,xmm0 | 
 | 	paddd	xmm5,xmm1 | 
 | 	pxor	xmm12,xmm4 | 
 | 	pxor	xmm13,xmm5 | 
 | 	movdqa	xmm6,xmm12 | 
 | 	pslld	xmm12,12 | 
 | 	psrld	xmm6,20 | 
 | 	movdqa	xmm7,xmm13 | 
 | 	pslld	xmm13,12 | 
 | 	por	xmm12,xmm6 | 
 | 	psrld	xmm7,20 | 
 | 	movdqa	xmm6,XMMWORD[r11] | 
 | 	por	xmm13,xmm7 | 
 | 	paddd	xmm8,xmm12 | 
 | 	paddd	xmm9,xmm13 | 
 | 	pxor	xmm0,xmm8 | 
 | 	pxor	xmm1,xmm9 | 
 | DB	102,15,56,0,198 | 
 | DB	102,15,56,0,206 | 
 | 	paddd	xmm4,xmm0 | 
 | 	paddd	xmm5,xmm1 | 
 | 	pxor	xmm12,xmm4 | 
 | 	pxor	xmm13,xmm5 | 
 | 	movdqa	xmm7,xmm12 | 
 | 	pslld	xmm12,7 | 
 | 	psrld	xmm7,25 | 
 | 	movdqa	xmm6,xmm13 | 
 | 	pslld	xmm13,7 | 
 | 	por	xmm12,xmm7 | 
 | 	psrld	xmm6,25 | 
 | 	movdqa	xmm7,XMMWORD[r10] | 
 | 	por	xmm13,xmm6 | 
 | 	movdqa	XMMWORD[rsp],xmm4 | 
 | 	movdqa	XMMWORD[16+rsp],xmm5 | 
 | 	movdqa	xmm4,XMMWORD[32+rsp] | 
 | 	movdqa	xmm5,XMMWORD[48+rsp] | 
 | 	paddd	xmm10,xmm14 | 
 | 	paddd	xmm11,xmm15 | 
 | 	pxor	xmm2,xmm10 | 
 | 	pxor	xmm3,xmm11 | 
 | DB	102,15,56,0,215 | 
 | DB	102,15,56,0,223 | 
 | 	paddd	xmm4,xmm2 | 
 | 	paddd	xmm5,xmm3 | 
 | 	pxor	xmm14,xmm4 | 
 | 	pxor	xmm15,xmm5 | 
 | 	movdqa	xmm6,xmm14 | 
 | 	pslld	xmm14,12 | 
 | 	psrld	xmm6,20 | 
 | 	movdqa	xmm7,xmm15 | 
 | 	pslld	xmm15,12 | 
 | 	por	xmm14,xmm6 | 
 | 	psrld	xmm7,20 | 
 | 	movdqa	xmm6,XMMWORD[r11] | 
 | 	por	xmm15,xmm7 | 
 | 	paddd	xmm10,xmm14 | 
 | 	paddd	xmm11,xmm15 | 
 | 	pxor	xmm2,xmm10 | 
 | 	pxor	xmm3,xmm11 | 
 | DB	102,15,56,0,214 | 
 | DB	102,15,56,0,222 | 
 | 	paddd	xmm4,xmm2 | 
 | 	paddd	xmm5,xmm3 | 
 | 	pxor	xmm14,xmm4 | 
 | 	pxor	xmm15,xmm5 | 
 | 	movdqa	xmm7,xmm14 | 
 | 	pslld	xmm14,7 | 
 | 	psrld	xmm7,25 | 
 | 	movdqa	xmm6,xmm15 | 
 | 	pslld	xmm15,7 | 
 | 	por	xmm14,xmm7 | 
 | 	psrld	xmm6,25 | 
 | 	movdqa	xmm7,XMMWORD[r10] | 
 | 	por	xmm15,xmm6 | 
 | 	paddd	xmm8,xmm13 | 
 | 	paddd	xmm9,xmm14 | 
 | 	pxor	xmm3,xmm8 | 
 | 	pxor	xmm0,xmm9 | 
 | DB	102,15,56,0,223 | 
 | DB	102,15,56,0,199 | 
 | 	paddd	xmm4,xmm3 | 
 | 	paddd	xmm5,xmm0 | 
 | 	pxor	xmm13,xmm4 | 
 | 	pxor	xmm14,xmm5 | 
 | 	movdqa	xmm6,xmm13 | 
 | 	pslld	xmm13,12 | 
 | 	psrld	xmm6,20 | 
 | 	movdqa	xmm7,xmm14 | 
 | 	pslld	xmm14,12 | 
 | 	por	xmm13,xmm6 | 
 | 	psrld	xmm7,20 | 
 | 	movdqa	xmm6,XMMWORD[r11] | 
 | 	por	xmm14,xmm7 | 
 | 	paddd	xmm8,xmm13 | 
 | 	paddd	xmm9,xmm14 | 
 | 	pxor	xmm3,xmm8 | 
 | 	pxor	xmm0,xmm9 | 
 | DB	102,15,56,0,222 | 
 | DB	102,15,56,0,198 | 
 | 	paddd	xmm4,xmm3 | 
 | 	paddd	xmm5,xmm0 | 
 | 	pxor	xmm13,xmm4 | 
 | 	pxor	xmm14,xmm5 | 
 | 	movdqa	xmm7,xmm13 | 
 | 	pslld	xmm13,7 | 
 | 	psrld	xmm7,25 | 
 | 	movdqa	xmm6,xmm14 | 
 | 	pslld	xmm14,7 | 
 | 	por	xmm13,xmm7 | 
 | 	psrld	xmm6,25 | 
 | 	movdqa	xmm7,XMMWORD[r10] | 
 | 	por	xmm14,xmm6 | 
 | 	movdqa	XMMWORD[32+rsp],xmm4 | 
 | 	movdqa	XMMWORD[48+rsp],xmm5 | 
 | 	movdqa	xmm4,XMMWORD[rsp] | 
 | 	movdqa	xmm5,XMMWORD[16+rsp] | 
 | 	paddd	xmm10,xmm15 | 
 | 	paddd	xmm11,xmm12 | 
 | 	pxor	xmm1,xmm10 | 
 | 	pxor	xmm2,xmm11 | 
 | DB	102,15,56,0,207 | 
 | DB	102,15,56,0,215 | 
 | 	paddd	xmm4,xmm1 | 
 | 	paddd	xmm5,xmm2 | 
 | 	pxor	xmm15,xmm4 | 
 | 	pxor	xmm12,xmm5 | 
 | 	movdqa	xmm6,xmm15 | 
 | 	pslld	xmm15,12 | 
 | 	psrld	xmm6,20 | 
 | 	movdqa	xmm7,xmm12 | 
 | 	pslld	xmm12,12 | 
 | 	por	xmm15,xmm6 | 
 | 	psrld	xmm7,20 | 
 | 	movdqa	xmm6,XMMWORD[r11] | 
 | 	por	xmm12,xmm7 | 
 | 	paddd	xmm10,xmm15 | 
 | 	paddd	xmm11,xmm12 | 
 | 	pxor	xmm1,xmm10 | 
 | 	pxor	xmm2,xmm11 | 
 | DB	102,15,56,0,206 | 
 | DB	102,15,56,0,214 | 
 | 	paddd	xmm4,xmm1 | 
 | 	paddd	xmm5,xmm2 | 
 | 	pxor	xmm15,xmm4 | 
 | 	pxor	xmm12,xmm5 | 
 | 	movdqa	xmm7,xmm15 | 
 | 	pslld	xmm15,7 | 
 | 	psrld	xmm7,25 | 
 | 	movdqa	xmm6,xmm12 | 
 | 	pslld	xmm12,7 | 
 | 	por	xmm15,xmm7 | 
 | 	psrld	xmm6,25 | 
 | 	movdqa	xmm7,XMMWORD[r10] | 
 | 	por	xmm12,xmm6 | 
 | 	dec	eax | 
 | 	jnz	NEAR $L$oop4x | 
 |  | 
 | 	paddd	xmm8,XMMWORD[64+rsp] | 
 | 	paddd	xmm9,XMMWORD[80+rsp] | 
 | 	paddd	xmm10,XMMWORD[96+rsp] | 
 | 	paddd	xmm11,XMMWORD[112+rsp] | 
 |  | 
 | 	movdqa	xmm6,xmm8 | 
 | 	punpckldq	xmm8,xmm9 | 
 | 	movdqa	xmm7,xmm10 | 
 | 	punpckldq	xmm10,xmm11 | 
 | 	punpckhdq	xmm6,xmm9 | 
 | 	punpckhdq	xmm7,xmm11 | 
 | 	movdqa	xmm9,xmm8 | 
 | 	punpcklqdq	xmm8,xmm10 | 
 | 	movdqa	xmm11,xmm6 | 
 | 	punpcklqdq	xmm6,xmm7 | 
 | 	punpckhqdq	xmm9,xmm10 | 
 | 	punpckhqdq	xmm11,xmm7 | 
 | 	paddd	xmm12,XMMWORD[((128-256))+rcx] | 
 | 	paddd	xmm13,XMMWORD[((144-256))+rcx] | 
 | 	paddd	xmm14,XMMWORD[((160-256))+rcx] | 
 | 	paddd	xmm15,XMMWORD[((176-256))+rcx] | 
 |  | 
 | 	movdqa	XMMWORD[rsp],xmm8 | 
 | 	movdqa	XMMWORD[16+rsp],xmm9 | 
 | 	movdqa	xmm8,XMMWORD[32+rsp] | 
 | 	movdqa	xmm9,XMMWORD[48+rsp] | 
 |  | 
 | 	movdqa	xmm10,xmm12 | 
 | 	punpckldq	xmm12,xmm13 | 
 | 	movdqa	xmm7,xmm14 | 
 | 	punpckldq	xmm14,xmm15 | 
 | 	punpckhdq	xmm10,xmm13 | 
 | 	punpckhdq	xmm7,xmm15 | 
 | 	movdqa	xmm13,xmm12 | 
 | 	punpcklqdq	xmm12,xmm14 | 
 | 	movdqa	xmm15,xmm10 | 
 | 	punpcklqdq	xmm10,xmm7 | 
 | 	punpckhqdq	xmm13,xmm14 | 
 | 	punpckhqdq	xmm15,xmm7 | 
 | 	paddd	xmm4,XMMWORD[((192-256))+rcx] | 
 | 	paddd	xmm5,XMMWORD[((208-256))+rcx] | 
 | 	paddd	xmm8,XMMWORD[((224-256))+rcx] | 
 | 	paddd	xmm9,XMMWORD[((240-256))+rcx] | 
 |  | 
 | 	movdqa	XMMWORD[32+rsp],xmm6 | 
 | 	movdqa	XMMWORD[48+rsp],xmm11 | 
 |  | 
 | 	movdqa	xmm14,xmm4 | 
 | 	punpckldq	xmm4,xmm5 | 
 | 	movdqa	xmm7,xmm8 | 
 | 	punpckldq	xmm8,xmm9 | 
 | 	punpckhdq	xmm14,xmm5 | 
 | 	punpckhdq	xmm7,xmm9 | 
 | 	movdqa	xmm5,xmm4 | 
 | 	punpcklqdq	xmm4,xmm8 | 
 | 	movdqa	xmm9,xmm14 | 
 | 	punpcklqdq	xmm14,xmm7 | 
 | 	punpckhqdq	xmm5,xmm8 | 
 | 	punpckhqdq	xmm9,xmm7 | 
 | 	paddd	xmm0,XMMWORD[((256-256))+rcx] | 
 | 	paddd	xmm1,XMMWORD[((272-256))+rcx] | 
 | 	paddd	xmm2,XMMWORD[((288-256))+rcx] | 
 | 	paddd	xmm3,XMMWORD[((304-256))+rcx] | 
 |  | 
 | 	movdqa	xmm8,xmm0 | 
 | 	punpckldq	xmm0,xmm1 | 
 | 	movdqa	xmm7,xmm2 | 
 | 	punpckldq	xmm2,xmm3 | 
 | 	punpckhdq	xmm8,xmm1 | 
 | 	punpckhdq	xmm7,xmm3 | 
 | 	movdqa	xmm1,xmm0 | 
 | 	punpcklqdq	xmm0,xmm2 | 
 | 	movdqa	xmm3,xmm8 | 
 | 	punpcklqdq	xmm8,xmm7 | 
 | 	punpckhqdq	xmm1,xmm2 | 
 | 	punpckhqdq	xmm3,xmm7 | 
 | 	cmp	rdx,64*4 | 
 | 	jb	NEAR $L$tail4x | 
 |  | 
 | 	movdqu	xmm6,XMMWORD[rsi] | 
 | 	movdqu	xmm11,XMMWORD[16+rsi] | 
 | 	movdqu	xmm2,XMMWORD[32+rsi] | 
 | 	movdqu	xmm7,XMMWORD[48+rsi] | 
 | 	pxor	xmm6,XMMWORD[rsp] | 
 | 	pxor	xmm11,xmm12 | 
 | 	pxor	xmm2,xmm4 | 
 | 	pxor	xmm7,xmm0 | 
 |  | 
 | 	movdqu	XMMWORD[rdi],xmm6 | 
 | 	movdqu	xmm6,XMMWORD[64+rsi] | 
 | 	movdqu	XMMWORD[16+rdi],xmm11 | 
 | 	movdqu	xmm11,XMMWORD[80+rsi] | 
 | 	movdqu	XMMWORD[32+rdi],xmm2 | 
 | 	movdqu	xmm2,XMMWORD[96+rsi] | 
 | 	movdqu	XMMWORD[48+rdi],xmm7 | 
 | 	movdqu	xmm7,XMMWORD[112+rsi] | 
 | 	lea	rsi,[128+rsi] | 
 | 	pxor	xmm6,XMMWORD[16+rsp] | 
 | 	pxor	xmm11,xmm13 | 
 | 	pxor	xmm2,xmm5 | 
 | 	pxor	xmm7,xmm1 | 
 |  | 
 | 	movdqu	XMMWORD[64+rdi],xmm6 | 
 | 	movdqu	xmm6,XMMWORD[rsi] | 
 | 	movdqu	XMMWORD[80+rdi],xmm11 | 
 | 	movdqu	xmm11,XMMWORD[16+rsi] | 
 | 	movdqu	XMMWORD[96+rdi],xmm2 | 
 | 	movdqu	xmm2,XMMWORD[32+rsi] | 
 | 	movdqu	XMMWORD[112+rdi],xmm7 | 
 | 	lea	rdi,[128+rdi] | 
 | 	movdqu	xmm7,XMMWORD[48+rsi] | 
 | 	pxor	xmm6,XMMWORD[32+rsp] | 
 | 	pxor	xmm11,xmm10 | 
 | 	pxor	xmm2,xmm14 | 
 | 	pxor	xmm7,xmm8 | 
 |  | 
 | 	movdqu	XMMWORD[rdi],xmm6 | 
 | 	movdqu	xmm6,XMMWORD[64+rsi] | 
 | 	movdqu	XMMWORD[16+rdi],xmm11 | 
 | 	movdqu	xmm11,XMMWORD[80+rsi] | 
 | 	movdqu	XMMWORD[32+rdi],xmm2 | 
 | 	movdqu	xmm2,XMMWORD[96+rsi] | 
 | 	movdqu	XMMWORD[48+rdi],xmm7 | 
 | 	movdqu	xmm7,XMMWORD[112+rsi] | 
 | 	lea	rsi,[128+rsi] | 
 | 	pxor	xmm6,XMMWORD[48+rsp] | 
 | 	pxor	xmm11,xmm15 | 
 | 	pxor	xmm2,xmm9 | 
 | 	pxor	xmm7,xmm3 | 
 | 	movdqu	XMMWORD[64+rdi],xmm6 | 
 | 	movdqu	XMMWORD[80+rdi],xmm11 | 
 | 	movdqu	XMMWORD[96+rdi],xmm2 | 
 | 	movdqu	XMMWORD[112+rdi],xmm7 | 
 | 	lea	rdi,[128+rdi] | 
 |  | 
 | 	sub	rdx,64*4 | 
 | 	jnz	NEAR $L$oop_outer4x | 
 |  | 
 | 	jmp	NEAR $L$done4x | 
 |  | 
 | $L$tail4x: | 
 | 	cmp	rdx,192 | 
 | 	jae	NEAR $L$192_or_more4x | 
 | 	cmp	rdx,128 | 
 | 	jae	NEAR $L$128_or_more4x | 
 | 	cmp	rdx,64 | 
 | 	jae	NEAR $L$64_or_more4x | 
 |  | 
 |  | 
 | 	xor	r10,r10 | 
 |  | 
 | 	movdqa	XMMWORD[16+rsp],xmm12 | 
 | 	movdqa	XMMWORD[32+rsp],xmm4 | 
 | 	movdqa	XMMWORD[48+rsp],xmm0 | 
 | 	jmp	NEAR $L$oop_tail4x | 
 |  | 
 | ALIGN	32 | 
 | $L$64_or_more4x: | 
 | 	movdqu	xmm6,XMMWORD[rsi] | 
 | 	movdqu	xmm11,XMMWORD[16+rsi] | 
 | 	movdqu	xmm2,XMMWORD[32+rsi] | 
 | 	movdqu	xmm7,XMMWORD[48+rsi] | 
 | 	pxor	xmm6,XMMWORD[rsp] | 
 | 	pxor	xmm11,xmm12 | 
 | 	pxor	xmm2,xmm4 | 
 | 	pxor	xmm7,xmm0 | 
 | 	movdqu	XMMWORD[rdi],xmm6 | 
 | 	movdqu	XMMWORD[16+rdi],xmm11 | 
 | 	movdqu	XMMWORD[32+rdi],xmm2 | 
 | 	movdqu	XMMWORD[48+rdi],xmm7 | 
 | 	je	NEAR $L$done4x | 
 |  | 
 | 	movdqa	xmm6,XMMWORD[16+rsp] | 
 | 	lea	rsi,[64+rsi] | 
 | 	xor	r10,r10 | 
 | 	movdqa	XMMWORD[rsp],xmm6 | 
 | 	movdqa	XMMWORD[16+rsp],xmm13 | 
 | 	lea	rdi,[64+rdi] | 
 | 	movdqa	XMMWORD[32+rsp],xmm5 | 
 | 	sub	rdx,64 | 
 | 	movdqa	XMMWORD[48+rsp],xmm1 | 
 | 	jmp	NEAR $L$oop_tail4x | 
 |  | 
 | ALIGN	32 | 
 | $L$128_or_more4x: | 
 | 	movdqu	xmm6,XMMWORD[rsi] | 
 | 	movdqu	xmm11,XMMWORD[16+rsi] | 
 | 	movdqu	xmm2,XMMWORD[32+rsi] | 
 | 	movdqu	xmm7,XMMWORD[48+rsi] | 
 | 	pxor	xmm6,XMMWORD[rsp] | 
 | 	pxor	xmm11,xmm12 | 
 | 	pxor	xmm2,xmm4 | 
 | 	pxor	xmm7,xmm0 | 
 |  | 
 | 	movdqu	XMMWORD[rdi],xmm6 | 
 | 	movdqu	xmm6,XMMWORD[64+rsi] | 
 | 	movdqu	XMMWORD[16+rdi],xmm11 | 
 | 	movdqu	xmm11,XMMWORD[80+rsi] | 
 | 	movdqu	XMMWORD[32+rdi],xmm2 | 
 | 	movdqu	xmm2,XMMWORD[96+rsi] | 
 | 	movdqu	XMMWORD[48+rdi],xmm7 | 
 | 	movdqu	xmm7,XMMWORD[112+rsi] | 
 | 	pxor	xmm6,XMMWORD[16+rsp] | 
 | 	pxor	xmm11,xmm13 | 
 | 	pxor	xmm2,xmm5 | 
 | 	pxor	xmm7,xmm1 | 
 | 	movdqu	XMMWORD[64+rdi],xmm6 | 
 | 	movdqu	XMMWORD[80+rdi],xmm11 | 
 | 	movdqu	XMMWORD[96+rdi],xmm2 | 
 | 	movdqu	XMMWORD[112+rdi],xmm7 | 
 | 	je	NEAR $L$done4x | 
 |  | 
 | 	movdqa	xmm6,XMMWORD[32+rsp] | 
 | 	lea	rsi,[128+rsi] | 
 | 	xor	r10,r10 | 
 | 	movdqa	XMMWORD[rsp],xmm6 | 
 | 	movdqa	XMMWORD[16+rsp],xmm10 | 
 | 	lea	rdi,[128+rdi] | 
 | 	movdqa	XMMWORD[32+rsp],xmm14 | 
 | 	sub	rdx,128 | 
 | 	movdqa	XMMWORD[48+rsp],xmm8 | 
 | 	jmp	NEAR $L$oop_tail4x | 
 |  | 
 | ALIGN	32 | 
 | $L$192_or_more4x: | 
 | 	movdqu	xmm6,XMMWORD[rsi] | 
 | 	movdqu	xmm11,XMMWORD[16+rsi] | 
 | 	movdqu	xmm2,XMMWORD[32+rsi] | 
 | 	movdqu	xmm7,XMMWORD[48+rsi] | 
 | 	pxor	xmm6,XMMWORD[rsp] | 
 | 	pxor	xmm11,xmm12 | 
 | 	pxor	xmm2,xmm4 | 
 | 	pxor	xmm7,xmm0 | 
 |  | 
 | 	movdqu	XMMWORD[rdi],xmm6 | 
 | 	movdqu	xmm6,XMMWORD[64+rsi] | 
 | 	movdqu	XMMWORD[16+rdi],xmm11 | 
 | 	movdqu	xmm11,XMMWORD[80+rsi] | 
 | 	movdqu	XMMWORD[32+rdi],xmm2 | 
 | 	movdqu	xmm2,XMMWORD[96+rsi] | 
 | 	movdqu	XMMWORD[48+rdi],xmm7 | 
 | 	movdqu	xmm7,XMMWORD[112+rsi] | 
 | 	lea	rsi,[128+rsi] | 
 | 	pxor	xmm6,XMMWORD[16+rsp] | 
 | 	pxor	xmm11,xmm13 | 
 | 	pxor	xmm2,xmm5 | 
 | 	pxor	xmm7,xmm1 | 
 |  | 
 | 	movdqu	XMMWORD[64+rdi],xmm6 | 
 | 	movdqu	xmm6,XMMWORD[rsi] | 
 | 	movdqu	XMMWORD[80+rdi],xmm11 | 
 | 	movdqu	xmm11,XMMWORD[16+rsi] | 
 | 	movdqu	XMMWORD[96+rdi],xmm2 | 
 | 	movdqu	xmm2,XMMWORD[32+rsi] | 
 | 	movdqu	XMMWORD[112+rdi],xmm7 | 
 | 	lea	rdi,[128+rdi] | 
 | 	movdqu	xmm7,XMMWORD[48+rsi] | 
 | 	pxor	xmm6,XMMWORD[32+rsp] | 
 | 	pxor	xmm11,xmm10 | 
 | 	pxor	xmm2,xmm14 | 
 | 	pxor	xmm7,xmm8 | 
 | 	movdqu	XMMWORD[rdi],xmm6 | 
 | 	movdqu	XMMWORD[16+rdi],xmm11 | 
 | 	movdqu	XMMWORD[32+rdi],xmm2 | 
 | 	movdqu	XMMWORD[48+rdi],xmm7 | 
 | 	je	NEAR $L$done4x | 
 |  | 
 | 	movdqa	xmm6,XMMWORD[48+rsp] | 
 | 	lea	rsi,[64+rsi] | 
 | 	xor	r10,r10 | 
 | 	movdqa	XMMWORD[rsp],xmm6 | 
 | 	movdqa	XMMWORD[16+rsp],xmm15 | 
 | 	lea	rdi,[64+rdi] | 
 | 	movdqa	XMMWORD[32+rsp],xmm9 | 
 | 	sub	rdx,192 | 
 | 	movdqa	XMMWORD[48+rsp],xmm3 | 
 |  | 
 | $L$oop_tail4x: | 
 | 	movzx	eax,BYTE[r10*1+rsi] | 
 | 	movzx	ecx,BYTE[r10*1+rsp] | 
 | 	lea	r10,[1+r10] | 
 | 	xor	eax,ecx | 
 | 	mov	BYTE[((-1))+r10*1+rdi],al | 
 | 	dec	rdx | 
 | 	jnz	NEAR $L$oop_tail4x | 
 |  | 
 | $L$done4x: | 
 | 	movaps	xmm6,XMMWORD[((-168))+r9] | 
 | 	movaps	xmm7,XMMWORD[((-152))+r9] | 
 | 	movaps	xmm8,XMMWORD[((-136))+r9] | 
 | 	movaps	xmm9,XMMWORD[((-120))+r9] | 
 | 	movaps	xmm10,XMMWORD[((-104))+r9] | 
 | 	movaps	xmm11,XMMWORD[((-88))+r9] | 
 | 	movaps	xmm12,XMMWORD[((-72))+r9] | 
 | 	movaps	xmm13,XMMWORD[((-56))+r9] | 
 | 	movaps	xmm14,XMMWORD[((-40))+r9] | 
 | 	movaps	xmm15,XMMWORD[((-24))+r9] | 
 | 	lea	rsp,[r9] | 
 |  | 
 | $L$4x_epilogue: | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_ChaCha20_ctr32_ssse3_4x: | 
 | global	ChaCha20_ctr32_avx2 | 
 |  | 
 | ALIGN	32 | 
 | ChaCha20_ctr32_avx2: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_ChaCha20_ctr32_avx2: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 | 	mov	rcx,r9 | 
 | 	mov	r8,QWORD[40+rsp] | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	mov	r9,rsp | 
 |  | 
 | 	sub	rsp,0x280+168 | 
 | 	and	rsp,-32 | 
 | 	movaps	XMMWORD[(-168)+r9],xmm6 | 
 | 	movaps	XMMWORD[(-152)+r9],xmm7 | 
 | 	movaps	XMMWORD[(-136)+r9],xmm8 | 
 | 	movaps	XMMWORD[(-120)+r9],xmm9 | 
 | 	movaps	XMMWORD[(-104)+r9],xmm10 | 
 | 	movaps	XMMWORD[(-88)+r9],xmm11 | 
 | 	movaps	XMMWORD[(-72)+r9],xmm12 | 
 | 	movaps	XMMWORD[(-56)+r9],xmm13 | 
 | 	movaps	XMMWORD[(-40)+r9],xmm14 | 
 | 	movaps	XMMWORD[(-24)+r9],xmm15 | 
 | $L$8x_body: | 
 | 	vzeroupper | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 |  | 
 | 	vbroadcasti128	ymm11,XMMWORD[$L$sigma] | 
 | 	vbroadcasti128	ymm3,XMMWORD[rcx] | 
 | 	vbroadcasti128	ymm15,XMMWORD[16+rcx] | 
 | 	vbroadcasti128	ymm7,XMMWORD[r8] | 
 | 	lea	rcx,[256+rsp] | 
 | 	lea	rax,[512+rsp] | 
 | 	lea	r10,[$L$rot16] | 
 | 	lea	r11,[$L$rot24] | 
 |  | 
 | 	vpshufd	ymm8,ymm11,0x00 | 
 | 	vpshufd	ymm9,ymm11,0x55 | 
 | 	vmovdqa	YMMWORD[(128-256)+rcx],ymm8 | 
 | 	vpshufd	ymm10,ymm11,0xaa | 
 | 	vmovdqa	YMMWORD[(160-256)+rcx],ymm9 | 
 | 	vpshufd	ymm11,ymm11,0xff | 
 | 	vmovdqa	YMMWORD[(192-256)+rcx],ymm10 | 
 | 	vmovdqa	YMMWORD[(224-256)+rcx],ymm11 | 
 |  | 
 | 	vpshufd	ymm0,ymm3,0x00 | 
 | 	vpshufd	ymm1,ymm3,0x55 | 
 | 	vmovdqa	YMMWORD[(256-256)+rcx],ymm0 | 
 | 	vpshufd	ymm2,ymm3,0xaa | 
 | 	vmovdqa	YMMWORD[(288-256)+rcx],ymm1 | 
 | 	vpshufd	ymm3,ymm3,0xff | 
 | 	vmovdqa	YMMWORD[(320-256)+rcx],ymm2 | 
 | 	vmovdqa	YMMWORD[(352-256)+rcx],ymm3 | 
 |  | 
 | 	vpshufd	ymm12,ymm15,0x00 | 
 | 	vpshufd	ymm13,ymm15,0x55 | 
 | 	vmovdqa	YMMWORD[(384-512)+rax],ymm12 | 
 | 	vpshufd	ymm14,ymm15,0xaa | 
 | 	vmovdqa	YMMWORD[(416-512)+rax],ymm13 | 
 | 	vpshufd	ymm15,ymm15,0xff | 
 | 	vmovdqa	YMMWORD[(448-512)+rax],ymm14 | 
 | 	vmovdqa	YMMWORD[(480-512)+rax],ymm15 | 
 |  | 
 | 	vpshufd	ymm4,ymm7,0x00 | 
 | 	vpshufd	ymm5,ymm7,0x55 | 
 | 	vpaddd	ymm4,ymm4,YMMWORD[$L$incy] | 
 | 	vpshufd	ymm6,ymm7,0xaa | 
 | 	vmovdqa	YMMWORD[(544-512)+rax],ymm5 | 
 | 	vpshufd	ymm7,ymm7,0xff | 
 | 	vmovdqa	YMMWORD[(576-512)+rax],ymm6 | 
 | 	vmovdqa	YMMWORD[(608-512)+rax],ymm7 | 
 |  | 
 | 	jmp	NEAR $L$oop_enter8x | 
 |  | 
 | ALIGN	32 | 
 | $L$oop_outer8x: | 
 | 	vmovdqa	ymm8,YMMWORD[((128-256))+rcx] | 
 | 	vmovdqa	ymm9,YMMWORD[((160-256))+rcx] | 
 | 	vmovdqa	ymm10,YMMWORD[((192-256))+rcx] | 
 | 	vmovdqa	ymm11,YMMWORD[((224-256))+rcx] | 
 | 	vmovdqa	ymm0,YMMWORD[((256-256))+rcx] | 
 | 	vmovdqa	ymm1,YMMWORD[((288-256))+rcx] | 
 | 	vmovdqa	ymm2,YMMWORD[((320-256))+rcx] | 
 | 	vmovdqa	ymm3,YMMWORD[((352-256))+rcx] | 
 | 	vmovdqa	ymm12,YMMWORD[((384-512))+rax] | 
 | 	vmovdqa	ymm13,YMMWORD[((416-512))+rax] | 
 | 	vmovdqa	ymm14,YMMWORD[((448-512))+rax] | 
 | 	vmovdqa	ymm15,YMMWORD[((480-512))+rax] | 
 | 	vmovdqa	ymm4,YMMWORD[((512-512))+rax] | 
 | 	vmovdqa	ymm5,YMMWORD[((544-512))+rax] | 
 | 	vmovdqa	ymm6,YMMWORD[((576-512))+rax] | 
 | 	vmovdqa	ymm7,YMMWORD[((608-512))+rax] | 
 | 	vpaddd	ymm4,ymm4,YMMWORD[$L$eight] | 
 |  | 
 | $L$oop_enter8x: | 
 | 	vmovdqa	YMMWORD[64+rsp],ymm14 | 
 | 	vmovdqa	YMMWORD[96+rsp],ymm15 | 
 | 	vbroadcasti128	ymm15,XMMWORD[r10] | 
 | 	vmovdqa	YMMWORD[(512-512)+rax],ymm4 | 
 | 	mov	eax,10 | 
 | 	jmp	NEAR $L$oop8x | 
 |  | 
 | ALIGN	32 | 
 | $L$oop8x: | 
 | 	vpaddd	ymm8,ymm8,ymm0 | 
 | 	vpxor	ymm4,ymm8,ymm4 | 
 | 	vpshufb	ymm4,ymm4,ymm15 | 
 | 	vpaddd	ymm9,ymm9,ymm1 | 
 | 	vpxor	ymm5,ymm9,ymm5 | 
 | 	vpshufb	ymm5,ymm5,ymm15 | 
 | 	vpaddd	ymm12,ymm12,ymm4 | 
 | 	vpxor	ymm0,ymm12,ymm0 | 
 | 	vpslld	ymm14,ymm0,12 | 
 | 	vpsrld	ymm0,ymm0,20 | 
 | 	vpor	ymm0,ymm14,ymm0 | 
 | 	vbroadcasti128	ymm14,XMMWORD[r11] | 
 | 	vpaddd	ymm13,ymm13,ymm5 | 
 | 	vpxor	ymm1,ymm13,ymm1 | 
 | 	vpslld	ymm15,ymm1,12 | 
 | 	vpsrld	ymm1,ymm1,20 | 
 | 	vpor	ymm1,ymm15,ymm1 | 
 | 	vpaddd	ymm8,ymm8,ymm0 | 
 | 	vpxor	ymm4,ymm8,ymm4 | 
 | 	vpshufb	ymm4,ymm4,ymm14 | 
 | 	vpaddd	ymm9,ymm9,ymm1 | 
 | 	vpxor	ymm5,ymm9,ymm5 | 
 | 	vpshufb	ymm5,ymm5,ymm14 | 
 | 	vpaddd	ymm12,ymm12,ymm4 | 
 | 	vpxor	ymm0,ymm12,ymm0 | 
 | 	vpslld	ymm15,ymm0,7 | 
 | 	vpsrld	ymm0,ymm0,25 | 
 | 	vpor	ymm0,ymm15,ymm0 | 
 | 	vbroadcasti128	ymm15,XMMWORD[r10] | 
 | 	vpaddd	ymm13,ymm13,ymm5 | 
 | 	vpxor	ymm1,ymm13,ymm1 | 
 | 	vpslld	ymm14,ymm1,7 | 
 | 	vpsrld	ymm1,ymm1,25 | 
 | 	vpor	ymm1,ymm14,ymm1 | 
 | 	vmovdqa	YMMWORD[rsp],ymm12 | 
 | 	vmovdqa	YMMWORD[32+rsp],ymm13 | 
 | 	vmovdqa	ymm12,YMMWORD[64+rsp] | 
 | 	vmovdqa	ymm13,YMMWORD[96+rsp] | 
 | 	vpaddd	ymm10,ymm10,ymm2 | 
 | 	vpxor	ymm6,ymm10,ymm6 | 
 | 	vpshufb	ymm6,ymm6,ymm15 | 
 | 	vpaddd	ymm11,ymm11,ymm3 | 
 | 	vpxor	ymm7,ymm11,ymm7 | 
 | 	vpshufb	ymm7,ymm7,ymm15 | 
 | 	vpaddd	ymm12,ymm12,ymm6 | 
 | 	vpxor	ymm2,ymm12,ymm2 | 
 | 	vpslld	ymm14,ymm2,12 | 
 | 	vpsrld	ymm2,ymm2,20 | 
 | 	vpor	ymm2,ymm14,ymm2 | 
 | 	vbroadcasti128	ymm14,XMMWORD[r11] | 
 | 	vpaddd	ymm13,ymm13,ymm7 | 
 | 	vpxor	ymm3,ymm13,ymm3 | 
 | 	vpslld	ymm15,ymm3,12 | 
 | 	vpsrld	ymm3,ymm3,20 | 
 | 	vpor	ymm3,ymm15,ymm3 | 
 | 	vpaddd	ymm10,ymm10,ymm2 | 
 | 	vpxor	ymm6,ymm10,ymm6 | 
 | 	vpshufb	ymm6,ymm6,ymm14 | 
 | 	vpaddd	ymm11,ymm11,ymm3 | 
 | 	vpxor	ymm7,ymm11,ymm7 | 
 | 	vpshufb	ymm7,ymm7,ymm14 | 
 | 	vpaddd	ymm12,ymm12,ymm6 | 
 | 	vpxor	ymm2,ymm12,ymm2 | 
 | 	vpslld	ymm15,ymm2,7 | 
 | 	vpsrld	ymm2,ymm2,25 | 
 | 	vpor	ymm2,ymm15,ymm2 | 
 | 	vbroadcasti128	ymm15,XMMWORD[r10] | 
 | 	vpaddd	ymm13,ymm13,ymm7 | 
 | 	vpxor	ymm3,ymm13,ymm3 | 
 | 	vpslld	ymm14,ymm3,7 | 
 | 	vpsrld	ymm3,ymm3,25 | 
 | 	vpor	ymm3,ymm14,ymm3 | 
 | 	vpaddd	ymm8,ymm8,ymm1 | 
 | 	vpxor	ymm7,ymm8,ymm7 | 
 | 	vpshufb	ymm7,ymm7,ymm15 | 
 | 	vpaddd	ymm9,ymm9,ymm2 | 
 | 	vpxor	ymm4,ymm9,ymm4 | 
 | 	vpshufb	ymm4,ymm4,ymm15 | 
 | 	vpaddd	ymm12,ymm12,ymm7 | 
 | 	vpxor	ymm1,ymm12,ymm1 | 
 | 	vpslld	ymm14,ymm1,12 | 
 | 	vpsrld	ymm1,ymm1,20 | 
 | 	vpor	ymm1,ymm14,ymm1 | 
 | 	vbroadcasti128	ymm14,XMMWORD[r11] | 
 | 	vpaddd	ymm13,ymm13,ymm4 | 
 | 	vpxor	ymm2,ymm13,ymm2 | 
 | 	vpslld	ymm15,ymm2,12 | 
 | 	vpsrld	ymm2,ymm2,20 | 
 | 	vpor	ymm2,ymm15,ymm2 | 
 | 	vpaddd	ymm8,ymm8,ymm1 | 
 | 	vpxor	ymm7,ymm8,ymm7 | 
 | 	vpshufb	ymm7,ymm7,ymm14 | 
 | 	vpaddd	ymm9,ymm9,ymm2 | 
 | 	vpxor	ymm4,ymm9,ymm4 | 
 | 	vpshufb	ymm4,ymm4,ymm14 | 
 | 	vpaddd	ymm12,ymm12,ymm7 | 
 | 	vpxor	ymm1,ymm12,ymm1 | 
 | 	vpslld	ymm15,ymm1,7 | 
 | 	vpsrld	ymm1,ymm1,25 | 
 | 	vpor	ymm1,ymm15,ymm1 | 
 | 	vbroadcasti128	ymm15,XMMWORD[r10] | 
 | 	vpaddd	ymm13,ymm13,ymm4 | 
 | 	vpxor	ymm2,ymm13,ymm2 | 
 | 	vpslld	ymm14,ymm2,7 | 
 | 	vpsrld	ymm2,ymm2,25 | 
 | 	vpor	ymm2,ymm14,ymm2 | 
 | 	vmovdqa	YMMWORD[64+rsp],ymm12 | 
 | 	vmovdqa	YMMWORD[96+rsp],ymm13 | 
 | 	vmovdqa	ymm12,YMMWORD[rsp] | 
 | 	vmovdqa	ymm13,YMMWORD[32+rsp] | 
 | 	vpaddd	ymm10,ymm10,ymm3 | 
 | 	vpxor	ymm5,ymm10,ymm5 | 
 | 	vpshufb	ymm5,ymm5,ymm15 | 
 | 	vpaddd	ymm11,ymm11,ymm0 | 
 | 	vpxor	ymm6,ymm11,ymm6 | 
 | 	vpshufb	ymm6,ymm6,ymm15 | 
 | 	vpaddd	ymm12,ymm12,ymm5 | 
 | 	vpxor	ymm3,ymm12,ymm3 | 
 | 	vpslld	ymm14,ymm3,12 | 
 | 	vpsrld	ymm3,ymm3,20 | 
 | 	vpor	ymm3,ymm14,ymm3 | 
 | 	vbroadcasti128	ymm14,XMMWORD[r11] | 
 | 	vpaddd	ymm13,ymm13,ymm6 | 
 | 	vpxor	ymm0,ymm13,ymm0 | 
 | 	vpslld	ymm15,ymm0,12 | 
 | 	vpsrld	ymm0,ymm0,20 | 
 | 	vpor	ymm0,ymm15,ymm0 | 
 | 	vpaddd	ymm10,ymm10,ymm3 | 
 | 	vpxor	ymm5,ymm10,ymm5 | 
 | 	vpshufb	ymm5,ymm5,ymm14 | 
 | 	vpaddd	ymm11,ymm11,ymm0 | 
 | 	vpxor	ymm6,ymm11,ymm6 | 
 | 	vpshufb	ymm6,ymm6,ymm14 | 
 | 	vpaddd	ymm12,ymm12,ymm5 | 
 | 	vpxor	ymm3,ymm12,ymm3 | 
 | 	vpslld	ymm15,ymm3,7 | 
 | 	vpsrld	ymm3,ymm3,25 | 
 | 	vpor	ymm3,ymm15,ymm3 | 
 | 	vbroadcasti128	ymm15,XMMWORD[r10] | 
 | 	vpaddd	ymm13,ymm13,ymm6 | 
 | 	vpxor	ymm0,ymm13,ymm0 | 
 | 	vpslld	ymm14,ymm0,7 | 
 | 	vpsrld	ymm0,ymm0,25 | 
 | 	vpor	ymm0,ymm14,ymm0 | 
 | 	dec	eax | 
 | 	jnz	NEAR $L$oop8x | 
 |  | 
 | 	lea	rax,[512+rsp] | 
 | 	vpaddd	ymm8,ymm8,YMMWORD[((128-256))+rcx] | 
 | 	vpaddd	ymm9,ymm9,YMMWORD[((160-256))+rcx] | 
 | 	vpaddd	ymm10,ymm10,YMMWORD[((192-256))+rcx] | 
 | 	vpaddd	ymm11,ymm11,YMMWORD[((224-256))+rcx] | 
 |  | 
 | 	vpunpckldq	ymm14,ymm8,ymm9 | 
 | 	vpunpckldq	ymm15,ymm10,ymm11 | 
 | 	vpunpckhdq	ymm8,ymm8,ymm9 | 
 | 	vpunpckhdq	ymm10,ymm10,ymm11 | 
 | 	vpunpcklqdq	ymm9,ymm14,ymm15 | 
 | 	vpunpckhqdq	ymm14,ymm14,ymm15 | 
 | 	vpunpcklqdq	ymm11,ymm8,ymm10 | 
 | 	vpunpckhqdq	ymm8,ymm8,ymm10 | 
 | 	vpaddd	ymm0,ymm0,YMMWORD[((256-256))+rcx] | 
 | 	vpaddd	ymm1,ymm1,YMMWORD[((288-256))+rcx] | 
 | 	vpaddd	ymm2,ymm2,YMMWORD[((320-256))+rcx] | 
 | 	vpaddd	ymm3,ymm3,YMMWORD[((352-256))+rcx] | 
 |  | 
 | 	vpunpckldq	ymm10,ymm0,ymm1 | 
 | 	vpunpckldq	ymm15,ymm2,ymm3 | 
 | 	vpunpckhdq	ymm0,ymm0,ymm1 | 
 | 	vpunpckhdq	ymm2,ymm2,ymm3 | 
 | 	vpunpcklqdq	ymm1,ymm10,ymm15 | 
 | 	vpunpckhqdq	ymm10,ymm10,ymm15 | 
 | 	vpunpcklqdq	ymm3,ymm0,ymm2 | 
 | 	vpunpckhqdq	ymm0,ymm0,ymm2 | 
 | 	vperm2i128	ymm15,ymm9,ymm1,0x20 | 
 | 	vperm2i128	ymm1,ymm9,ymm1,0x31 | 
 | 	vperm2i128	ymm9,ymm14,ymm10,0x20 | 
 | 	vperm2i128	ymm10,ymm14,ymm10,0x31 | 
 | 	vperm2i128	ymm14,ymm11,ymm3,0x20 | 
 | 	vperm2i128	ymm3,ymm11,ymm3,0x31 | 
 | 	vperm2i128	ymm11,ymm8,ymm0,0x20 | 
 | 	vperm2i128	ymm0,ymm8,ymm0,0x31 | 
 | 	vmovdqa	YMMWORD[rsp],ymm15 | 
 | 	vmovdqa	YMMWORD[32+rsp],ymm9 | 
 | 	vmovdqa	ymm15,YMMWORD[64+rsp] | 
 | 	vmovdqa	ymm9,YMMWORD[96+rsp] | 
 |  | 
 | 	vpaddd	ymm12,ymm12,YMMWORD[((384-512))+rax] | 
 | 	vpaddd	ymm13,ymm13,YMMWORD[((416-512))+rax] | 
 | 	vpaddd	ymm15,ymm15,YMMWORD[((448-512))+rax] | 
 | 	vpaddd	ymm9,ymm9,YMMWORD[((480-512))+rax] | 
 |  | 
 | 	vpunpckldq	ymm2,ymm12,ymm13 | 
 | 	vpunpckldq	ymm8,ymm15,ymm9 | 
 | 	vpunpckhdq	ymm12,ymm12,ymm13 | 
 | 	vpunpckhdq	ymm15,ymm15,ymm9 | 
 | 	vpunpcklqdq	ymm13,ymm2,ymm8 | 
 | 	vpunpckhqdq	ymm2,ymm2,ymm8 | 
 | 	vpunpcklqdq	ymm9,ymm12,ymm15 | 
 | 	vpunpckhqdq	ymm12,ymm12,ymm15 | 
 | 	vpaddd	ymm4,ymm4,YMMWORD[((512-512))+rax] | 
 | 	vpaddd	ymm5,ymm5,YMMWORD[((544-512))+rax] | 
 | 	vpaddd	ymm6,ymm6,YMMWORD[((576-512))+rax] | 
 | 	vpaddd	ymm7,ymm7,YMMWORD[((608-512))+rax] | 
 |  | 
 | 	vpunpckldq	ymm15,ymm4,ymm5 | 
 | 	vpunpckldq	ymm8,ymm6,ymm7 | 
 | 	vpunpckhdq	ymm4,ymm4,ymm5 | 
 | 	vpunpckhdq	ymm6,ymm6,ymm7 | 
 | 	vpunpcklqdq	ymm5,ymm15,ymm8 | 
 | 	vpunpckhqdq	ymm15,ymm15,ymm8 | 
 | 	vpunpcklqdq	ymm7,ymm4,ymm6 | 
 | 	vpunpckhqdq	ymm4,ymm4,ymm6 | 
 | 	vperm2i128	ymm8,ymm13,ymm5,0x20 | 
 | 	vperm2i128	ymm5,ymm13,ymm5,0x31 | 
 | 	vperm2i128	ymm13,ymm2,ymm15,0x20 | 
 | 	vperm2i128	ymm15,ymm2,ymm15,0x31 | 
 | 	vperm2i128	ymm2,ymm9,ymm7,0x20 | 
 | 	vperm2i128	ymm7,ymm9,ymm7,0x31 | 
 | 	vperm2i128	ymm9,ymm12,ymm4,0x20 | 
 | 	vperm2i128	ymm4,ymm12,ymm4,0x31 | 
 | 	vmovdqa	ymm6,YMMWORD[rsp] | 
 | 	vmovdqa	ymm12,YMMWORD[32+rsp] | 
 |  | 
 | 	cmp	rdx,64*8 | 
 | 	jb	NEAR $L$tail8x | 
 |  | 
 | 	vpxor	ymm6,ymm6,YMMWORD[rsi] | 
 | 	vpxor	ymm8,ymm8,YMMWORD[32+rsi] | 
 | 	vpxor	ymm1,ymm1,YMMWORD[64+rsi] | 
 | 	vpxor	ymm5,ymm5,YMMWORD[96+rsi] | 
 | 	lea	rsi,[128+rsi] | 
 | 	vmovdqu	YMMWORD[rdi],ymm6 | 
 | 	vmovdqu	YMMWORD[32+rdi],ymm8 | 
 | 	vmovdqu	YMMWORD[64+rdi],ymm1 | 
 | 	vmovdqu	YMMWORD[96+rdi],ymm5 | 
 | 	lea	rdi,[128+rdi] | 
 |  | 
 | 	vpxor	ymm12,ymm12,YMMWORD[rsi] | 
 | 	vpxor	ymm13,ymm13,YMMWORD[32+rsi] | 
 | 	vpxor	ymm10,ymm10,YMMWORD[64+rsi] | 
 | 	vpxor	ymm15,ymm15,YMMWORD[96+rsi] | 
 | 	lea	rsi,[128+rsi] | 
 | 	vmovdqu	YMMWORD[rdi],ymm12 | 
 | 	vmovdqu	YMMWORD[32+rdi],ymm13 | 
 | 	vmovdqu	YMMWORD[64+rdi],ymm10 | 
 | 	vmovdqu	YMMWORD[96+rdi],ymm15 | 
 | 	lea	rdi,[128+rdi] | 
 |  | 
 | 	vpxor	ymm14,ymm14,YMMWORD[rsi] | 
 | 	vpxor	ymm2,ymm2,YMMWORD[32+rsi] | 
 | 	vpxor	ymm3,ymm3,YMMWORD[64+rsi] | 
 | 	vpxor	ymm7,ymm7,YMMWORD[96+rsi] | 
 | 	lea	rsi,[128+rsi] | 
 | 	vmovdqu	YMMWORD[rdi],ymm14 | 
 | 	vmovdqu	YMMWORD[32+rdi],ymm2 | 
 | 	vmovdqu	YMMWORD[64+rdi],ymm3 | 
 | 	vmovdqu	YMMWORD[96+rdi],ymm7 | 
 | 	lea	rdi,[128+rdi] | 
 |  | 
 | 	vpxor	ymm11,ymm11,YMMWORD[rsi] | 
 | 	vpxor	ymm9,ymm9,YMMWORD[32+rsi] | 
 | 	vpxor	ymm0,ymm0,YMMWORD[64+rsi] | 
 | 	vpxor	ymm4,ymm4,YMMWORD[96+rsi] | 
 | 	lea	rsi,[128+rsi] | 
 | 	vmovdqu	YMMWORD[rdi],ymm11 | 
 | 	vmovdqu	YMMWORD[32+rdi],ymm9 | 
 | 	vmovdqu	YMMWORD[64+rdi],ymm0 | 
 | 	vmovdqu	YMMWORD[96+rdi],ymm4 | 
 | 	lea	rdi,[128+rdi] | 
 |  | 
 | 	sub	rdx,64*8 | 
 | 	jnz	NEAR $L$oop_outer8x | 
 |  | 
 | 	jmp	NEAR $L$done8x | 
 |  | 
 | $L$tail8x: | 
 | 	cmp	rdx,448 | 
 | 	jae	NEAR $L$448_or_more8x | 
 | 	cmp	rdx,384 | 
 | 	jae	NEAR $L$384_or_more8x | 
 | 	cmp	rdx,320 | 
 | 	jae	NEAR $L$320_or_more8x | 
 | 	cmp	rdx,256 | 
 | 	jae	NEAR $L$256_or_more8x | 
 | 	cmp	rdx,192 | 
 | 	jae	NEAR $L$192_or_more8x | 
 | 	cmp	rdx,128 | 
 | 	jae	NEAR $L$128_or_more8x | 
 | 	cmp	rdx,64 | 
 | 	jae	NEAR $L$64_or_more8x | 
 |  | 
 | 	xor	r10,r10 | 
 | 	vmovdqa	YMMWORD[rsp],ymm6 | 
 | 	vmovdqa	YMMWORD[32+rsp],ymm8 | 
 | 	jmp	NEAR $L$oop_tail8x | 
 |  | 
 | ALIGN	32 | 
 | $L$64_or_more8x: | 
 | 	vpxor	ymm6,ymm6,YMMWORD[rsi] | 
 | 	vpxor	ymm8,ymm8,YMMWORD[32+rsi] | 
 | 	vmovdqu	YMMWORD[rdi],ymm6 | 
 | 	vmovdqu	YMMWORD[32+rdi],ymm8 | 
 | 	je	NEAR $L$done8x | 
 |  | 
 | 	lea	rsi,[64+rsi] | 
 | 	xor	r10,r10 | 
 | 	vmovdqa	YMMWORD[rsp],ymm1 | 
 | 	lea	rdi,[64+rdi] | 
 | 	sub	rdx,64 | 
 | 	vmovdqa	YMMWORD[32+rsp],ymm5 | 
 | 	jmp	NEAR $L$oop_tail8x | 
 |  | 
 | ALIGN	32 | 
 | $L$128_or_more8x: | 
 | 	vpxor	ymm6,ymm6,YMMWORD[rsi] | 
 | 	vpxor	ymm8,ymm8,YMMWORD[32+rsi] | 
 | 	vpxor	ymm1,ymm1,YMMWORD[64+rsi] | 
 | 	vpxor	ymm5,ymm5,YMMWORD[96+rsi] | 
 | 	vmovdqu	YMMWORD[rdi],ymm6 | 
 | 	vmovdqu	YMMWORD[32+rdi],ymm8 | 
 | 	vmovdqu	YMMWORD[64+rdi],ymm1 | 
 | 	vmovdqu	YMMWORD[96+rdi],ymm5 | 
 | 	je	NEAR $L$done8x | 
 |  | 
 | 	lea	rsi,[128+rsi] | 
 | 	xor	r10,r10 | 
 | 	vmovdqa	YMMWORD[rsp],ymm12 | 
 | 	lea	rdi,[128+rdi] | 
 | 	sub	rdx,128 | 
 | 	vmovdqa	YMMWORD[32+rsp],ymm13 | 
 | 	jmp	NEAR $L$oop_tail8x | 
 |  | 
 | ALIGN	32 | 
 | $L$192_or_more8x: | 
 | 	vpxor	ymm6,ymm6,YMMWORD[rsi] | 
 | 	vpxor	ymm8,ymm8,YMMWORD[32+rsi] | 
 | 	vpxor	ymm1,ymm1,YMMWORD[64+rsi] | 
 | 	vpxor	ymm5,ymm5,YMMWORD[96+rsi] | 
 | 	vpxor	ymm12,ymm12,YMMWORD[128+rsi] | 
 | 	vpxor	ymm13,ymm13,YMMWORD[160+rsi] | 
 | 	vmovdqu	YMMWORD[rdi],ymm6 | 
 | 	vmovdqu	YMMWORD[32+rdi],ymm8 | 
 | 	vmovdqu	YMMWORD[64+rdi],ymm1 | 
 | 	vmovdqu	YMMWORD[96+rdi],ymm5 | 
 | 	vmovdqu	YMMWORD[128+rdi],ymm12 | 
 | 	vmovdqu	YMMWORD[160+rdi],ymm13 | 
 | 	je	NEAR $L$done8x | 
 |  | 
 | 	lea	rsi,[192+rsi] | 
 | 	xor	r10,r10 | 
 | 	vmovdqa	YMMWORD[rsp],ymm10 | 
 | 	lea	rdi,[192+rdi] | 
 | 	sub	rdx,192 | 
 | 	vmovdqa	YMMWORD[32+rsp],ymm15 | 
 | 	jmp	NEAR $L$oop_tail8x | 
 |  | 
 | ALIGN	32 | 
 | $L$256_or_more8x: | 
 | 	vpxor	ymm6,ymm6,YMMWORD[rsi] | 
 | 	vpxor	ymm8,ymm8,YMMWORD[32+rsi] | 
 | 	vpxor	ymm1,ymm1,YMMWORD[64+rsi] | 
 | 	vpxor	ymm5,ymm5,YMMWORD[96+rsi] | 
 | 	vpxor	ymm12,ymm12,YMMWORD[128+rsi] | 
 | 	vpxor	ymm13,ymm13,YMMWORD[160+rsi] | 
 | 	vpxor	ymm10,ymm10,YMMWORD[192+rsi] | 
 | 	vpxor	ymm15,ymm15,YMMWORD[224+rsi] | 
 | 	vmovdqu	YMMWORD[rdi],ymm6 | 
 | 	vmovdqu	YMMWORD[32+rdi],ymm8 | 
 | 	vmovdqu	YMMWORD[64+rdi],ymm1 | 
 | 	vmovdqu	YMMWORD[96+rdi],ymm5 | 
 | 	vmovdqu	YMMWORD[128+rdi],ymm12 | 
 | 	vmovdqu	YMMWORD[160+rdi],ymm13 | 
 | 	vmovdqu	YMMWORD[192+rdi],ymm10 | 
 | 	vmovdqu	YMMWORD[224+rdi],ymm15 | 
 | 	je	NEAR $L$done8x | 
 |  | 
 | 	lea	rsi,[256+rsi] | 
 | 	xor	r10,r10 | 
 | 	vmovdqa	YMMWORD[rsp],ymm14 | 
 | 	lea	rdi,[256+rdi] | 
 | 	sub	rdx,256 | 
 | 	vmovdqa	YMMWORD[32+rsp],ymm2 | 
 | 	jmp	NEAR $L$oop_tail8x | 
 |  | 
 | ALIGN	32 | 
 | $L$320_or_more8x: | 
 | 	vpxor	ymm6,ymm6,YMMWORD[rsi] | 
 | 	vpxor	ymm8,ymm8,YMMWORD[32+rsi] | 
 | 	vpxor	ymm1,ymm1,YMMWORD[64+rsi] | 
 | 	vpxor	ymm5,ymm5,YMMWORD[96+rsi] | 
 | 	vpxor	ymm12,ymm12,YMMWORD[128+rsi] | 
 | 	vpxor	ymm13,ymm13,YMMWORD[160+rsi] | 
 | 	vpxor	ymm10,ymm10,YMMWORD[192+rsi] | 
 | 	vpxor	ymm15,ymm15,YMMWORD[224+rsi] | 
 | 	vpxor	ymm14,ymm14,YMMWORD[256+rsi] | 
 | 	vpxor	ymm2,ymm2,YMMWORD[288+rsi] | 
 | 	vmovdqu	YMMWORD[rdi],ymm6 | 
 | 	vmovdqu	YMMWORD[32+rdi],ymm8 | 
 | 	vmovdqu	YMMWORD[64+rdi],ymm1 | 
 | 	vmovdqu	YMMWORD[96+rdi],ymm5 | 
 | 	vmovdqu	YMMWORD[128+rdi],ymm12 | 
 | 	vmovdqu	YMMWORD[160+rdi],ymm13 | 
 | 	vmovdqu	YMMWORD[192+rdi],ymm10 | 
 | 	vmovdqu	YMMWORD[224+rdi],ymm15 | 
 | 	vmovdqu	YMMWORD[256+rdi],ymm14 | 
 | 	vmovdqu	YMMWORD[288+rdi],ymm2 | 
 | 	je	NEAR $L$done8x | 
 |  | 
 | 	lea	rsi,[320+rsi] | 
 | 	xor	r10,r10 | 
 | 	vmovdqa	YMMWORD[rsp],ymm3 | 
 | 	lea	rdi,[320+rdi] | 
 | 	sub	rdx,320 | 
 | 	vmovdqa	YMMWORD[32+rsp],ymm7 | 
 | 	jmp	NEAR $L$oop_tail8x | 
 |  | 
 | ALIGN	32 | 
 | $L$384_or_more8x: | 
 | 	vpxor	ymm6,ymm6,YMMWORD[rsi] | 
 | 	vpxor	ymm8,ymm8,YMMWORD[32+rsi] | 
 | 	vpxor	ymm1,ymm1,YMMWORD[64+rsi] | 
 | 	vpxor	ymm5,ymm5,YMMWORD[96+rsi] | 
 | 	vpxor	ymm12,ymm12,YMMWORD[128+rsi] | 
 | 	vpxor	ymm13,ymm13,YMMWORD[160+rsi] | 
 | 	vpxor	ymm10,ymm10,YMMWORD[192+rsi] | 
 | 	vpxor	ymm15,ymm15,YMMWORD[224+rsi] | 
 | 	vpxor	ymm14,ymm14,YMMWORD[256+rsi] | 
 | 	vpxor	ymm2,ymm2,YMMWORD[288+rsi] | 
 | 	vpxor	ymm3,ymm3,YMMWORD[320+rsi] | 
 | 	vpxor	ymm7,ymm7,YMMWORD[352+rsi] | 
 | 	vmovdqu	YMMWORD[rdi],ymm6 | 
 | 	vmovdqu	YMMWORD[32+rdi],ymm8 | 
 | 	vmovdqu	YMMWORD[64+rdi],ymm1 | 
 | 	vmovdqu	YMMWORD[96+rdi],ymm5 | 
 | 	vmovdqu	YMMWORD[128+rdi],ymm12 | 
 | 	vmovdqu	YMMWORD[160+rdi],ymm13 | 
 | 	vmovdqu	YMMWORD[192+rdi],ymm10 | 
 | 	vmovdqu	YMMWORD[224+rdi],ymm15 | 
 | 	vmovdqu	YMMWORD[256+rdi],ymm14 | 
 | 	vmovdqu	YMMWORD[288+rdi],ymm2 | 
 | 	vmovdqu	YMMWORD[320+rdi],ymm3 | 
 | 	vmovdqu	YMMWORD[352+rdi],ymm7 | 
 | 	je	NEAR $L$done8x | 
 |  | 
 | 	lea	rsi,[384+rsi] | 
 | 	xor	r10,r10 | 
 | 	vmovdqa	YMMWORD[rsp],ymm11 | 
 | 	lea	rdi,[384+rdi] | 
 | 	sub	rdx,384 | 
 | 	vmovdqa	YMMWORD[32+rsp],ymm9 | 
 | 	jmp	NEAR $L$oop_tail8x | 
 |  | 
 | ALIGN	32 | 
 | $L$448_or_more8x: | 
 | 	vpxor	ymm6,ymm6,YMMWORD[rsi] | 
 | 	vpxor	ymm8,ymm8,YMMWORD[32+rsi] | 
 | 	vpxor	ymm1,ymm1,YMMWORD[64+rsi] | 
 | 	vpxor	ymm5,ymm5,YMMWORD[96+rsi] | 
 | 	vpxor	ymm12,ymm12,YMMWORD[128+rsi] | 
 | 	vpxor	ymm13,ymm13,YMMWORD[160+rsi] | 
 | 	vpxor	ymm10,ymm10,YMMWORD[192+rsi] | 
 | 	vpxor	ymm15,ymm15,YMMWORD[224+rsi] | 
 | 	vpxor	ymm14,ymm14,YMMWORD[256+rsi] | 
 | 	vpxor	ymm2,ymm2,YMMWORD[288+rsi] | 
 | 	vpxor	ymm3,ymm3,YMMWORD[320+rsi] | 
 | 	vpxor	ymm7,ymm7,YMMWORD[352+rsi] | 
 | 	vpxor	ymm11,ymm11,YMMWORD[384+rsi] | 
 | 	vpxor	ymm9,ymm9,YMMWORD[416+rsi] | 
 | 	vmovdqu	YMMWORD[rdi],ymm6 | 
 | 	vmovdqu	YMMWORD[32+rdi],ymm8 | 
 | 	vmovdqu	YMMWORD[64+rdi],ymm1 | 
 | 	vmovdqu	YMMWORD[96+rdi],ymm5 | 
 | 	vmovdqu	YMMWORD[128+rdi],ymm12 | 
 | 	vmovdqu	YMMWORD[160+rdi],ymm13 | 
 | 	vmovdqu	YMMWORD[192+rdi],ymm10 | 
 | 	vmovdqu	YMMWORD[224+rdi],ymm15 | 
 | 	vmovdqu	YMMWORD[256+rdi],ymm14 | 
 | 	vmovdqu	YMMWORD[288+rdi],ymm2 | 
 | 	vmovdqu	YMMWORD[320+rdi],ymm3 | 
 | 	vmovdqu	YMMWORD[352+rdi],ymm7 | 
 | 	vmovdqu	YMMWORD[384+rdi],ymm11 | 
 | 	vmovdqu	YMMWORD[416+rdi],ymm9 | 
 | 	je	NEAR $L$done8x | 
 |  | 
 | 	lea	rsi,[448+rsi] | 
 | 	xor	r10,r10 | 
 | 	vmovdqa	YMMWORD[rsp],ymm0 | 
 | 	lea	rdi,[448+rdi] | 
 | 	sub	rdx,448 | 
 | 	vmovdqa	YMMWORD[32+rsp],ymm4 | 
 |  | 
 | $L$oop_tail8x: | 
 | 	movzx	eax,BYTE[r10*1+rsi] | 
 | 	movzx	ecx,BYTE[r10*1+rsp] | 
 | 	lea	r10,[1+r10] | 
 | 	xor	eax,ecx | 
 | 	mov	BYTE[((-1))+r10*1+rdi],al | 
 | 	dec	rdx | 
 | 	jnz	NEAR $L$oop_tail8x | 
 |  | 
 | $L$done8x: | 
 | 	vzeroall | 
 | 	movaps	xmm6,XMMWORD[((-168))+r9] | 
 | 	movaps	xmm7,XMMWORD[((-152))+r9] | 
 | 	movaps	xmm8,XMMWORD[((-136))+r9] | 
 | 	movaps	xmm9,XMMWORD[((-120))+r9] | 
 | 	movaps	xmm10,XMMWORD[((-104))+r9] | 
 | 	movaps	xmm11,XMMWORD[((-88))+r9] | 
 | 	movaps	xmm12,XMMWORD[((-72))+r9] | 
 | 	movaps	xmm13,XMMWORD[((-56))+r9] | 
 | 	movaps	xmm14,XMMWORD[((-40))+r9] | 
 | 	movaps	xmm15,XMMWORD[((-24))+r9] | 
 | 	lea	rsp,[r9] | 
 |  | 
 | $L$8x_epilogue: | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_ChaCha20_ctr32_avx2: | 
 | EXTERN	__imp_RtlVirtualUnwind | 
 |  | 
 | ALIGN	16 | 
 | se_handler: | 
 | 	push	rsi | 
 | 	push	rdi | 
 | 	push	rbx | 
 | 	push	rbp | 
 | 	push	r12 | 
 | 	push	r13 | 
 | 	push	r14 | 
 | 	push	r15 | 
 | 	pushfq | 
 | 	sub	rsp,64 | 
 |  | 
 | 	mov	rax,QWORD[120+r8] | 
 | 	mov	rbx,QWORD[248+r8] | 
 |  | 
 | 	mov	rsi,QWORD[8+r9] | 
 | 	mov	r11,QWORD[56+r9] | 
 |  | 
 | 	lea	r10,[$L$ctr32_body] | 
 | 	cmp	rbx,r10 | 
 | 	jb	NEAR $L$common_seh_tail | 
 |  | 
 | 	mov	rax,QWORD[152+r8] | 
 |  | 
 | 	lea	r10,[$L$no_data] | 
 | 	cmp	rbx,r10 | 
 | 	jae	NEAR $L$common_seh_tail | 
 |  | 
 | 	lea	rax,[((64+24+48))+rax] | 
 |  | 
 | 	mov	rbx,QWORD[((-8))+rax] | 
 | 	mov	rbp,QWORD[((-16))+rax] | 
 | 	mov	r12,QWORD[((-24))+rax] | 
 | 	mov	r13,QWORD[((-32))+rax] | 
 | 	mov	r14,QWORD[((-40))+rax] | 
 | 	mov	r15,QWORD[((-48))+rax] | 
 | 	mov	QWORD[144+r8],rbx | 
 | 	mov	QWORD[160+r8],rbp | 
 | 	mov	QWORD[216+r8],r12 | 
 | 	mov	QWORD[224+r8],r13 | 
 | 	mov	QWORD[232+r8],r14 | 
 | 	mov	QWORD[240+r8],r15 | 
 |  | 
 | $L$common_seh_tail: | 
 | 	mov	rdi,QWORD[8+rax] | 
 | 	mov	rsi,QWORD[16+rax] | 
 | 	mov	QWORD[152+r8],rax | 
 | 	mov	QWORD[168+r8],rsi | 
 | 	mov	QWORD[176+r8],rdi | 
 |  | 
 | 	mov	rdi,QWORD[40+r9] | 
 | 	mov	rsi,r8 | 
 | 	mov	ecx,154 | 
 | 	DD	0xa548f3fc | 
 |  | 
 | 	mov	rsi,r9 | 
 | 	xor	rcx,rcx | 
 | 	mov	rdx,QWORD[8+rsi] | 
 | 	mov	r8,QWORD[rsi] | 
 | 	mov	r9,QWORD[16+rsi] | 
 | 	mov	r10,QWORD[40+rsi] | 
 | 	lea	r11,[56+rsi] | 
 | 	lea	r12,[24+rsi] | 
 | 	mov	QWORD[32+rsp],r10 | 
 | 	mov	QWORD[40+rsp],r11 | 
 | 	mov	QWORD[48+rsp],r12 | 
 | 	mov	QWORD[56+rsp],rcx | 
 | 	call	QWORD[__imp_RtlVirtualUnwind] | 
 |  | 
 | 	mov	eax,1 | 
 | 	add	rsp,64 | 
 | 	popfq | 
 | 	pop	r15 | 
 | 	pop	r14 | 
 | 	pop	r13 | 
 | 	pop	r12 | 
 | 	pop	rbp | 
 | 	pop	rbx | 
 | 	pop	rdi | 
 | 	pop	rsi | 
 | 	ret | 
 |  | 
 |  | 
 |  | 
 | ALIGN	16 | 
 | ssse3_handler: | 
 | 	push	rsi | 
 | 	push	rdi | 
 | 	push	rbx | 
 | 	push	rbp | 
 | 	push	r12 | 
 | 	push	r13 | 
 | 	push	r14 | 
 | 	push	r15 | 
 | 	pushfq | 
 | 	sub	rsp,64 | 
 |  | 
 | 	mov	rax,QWORD[120+r8] | 
 | 	mov	rbx,QWORD[248+r8] | 
 |  | 
 | 	mov	rsi,QWORD[8+r9] | 
 | 	mov	r11,QWORD[56+r9] | 
 |  | 
 | 	mov	r10d,DWORD[r11] | 
 | 	lea	r10,[r10*1+rsi] | 
 | 	cmp	rbx,r10 | 
 | 	jb	NEAR $L$common_seh_tail | 
 |  | 
 | 	mov	rax,QWORD[192+r8] | 
 |  | 
 | 	mov	r10d,DWORD[4+r11] | 
 | 	lea	r10,[r10*1+rsi] | 
 | 	cmp	rbx,r10 | 
 | 	jae	NEAR $L$common_seh_tail | 
 |  | 
 | 	lea	rsi,[((-40))+rax] | 
 | 	lea	rdi,[512+r8] | 
 | 	mov	ecx,4 | 
 | 	DD	0xa548f3fc | 
 |  | 
 | 	jmp	NEAR $L$common_seh_tail | 
 |  | 
 |  | 
 |  | 
 | ALIGN	16 | 
 | full_handler: | 
 | 	push	rsi | 
 | 	push	rdi | 
 | 	push	rbx | 
 | 	push	rbp | 
 | 	push	r12 | 
 | 	push	r13 | 
 | 	push	r14 | 
 | 	push	r15 | 
 | 	pushfq | 
 | 	sub	rsp,64 | 
 |  | 
 | 	mov	rax,QWORD[120+r8] | 
 | 	mov	rbx,QWORD[248+r8] | 
 |  | 
 | 	mov	rsi,QWORD[8+r9] | 
 | 	mov	r11,QWORD[56+r9] | 
 |  | 
 | 	mov	r10d,DWORD[r11] | 
 | 	lea	r10,[r10*1+rsi] | 
 | 	cmp	rbx,r10 | 
 | 	jb	NEAR $L$common_seh_tail | 
 |  | 
 | 	mov	rax,QWORD[192+r8] | 
 |  | 
 | 	mov	r10d,DWORD[4+r11] | 
 | 	lea	r10,[r10*1+rsi] | 
 | 	cmp	rbx,r10 | 
 | 	jae	NEAR $L$common_seh_tail | 
 |  | 
 | 	lea	rsi,[((-168))+rax] | 
 | 	lea	rdi,[512+r8] | 
 | 	mov	ecx,20 | 
 | 	DD	0xa548f3fc | 
 |  | 
 | 	jmp	NEAR $L$common_seh_tail | 
 |  | 
 |  | 
 | section	.pdata rdata align=4 | 
 | ALIGN	4 | 
 | 	DD	$L$SEH_begin_ChaCha20_ctr32_nohw wrt ..imagebase | 
 | 	DD	$L$SEH_end_ChaCha20_ctr32_nohw wrt ..imagebase | 
 | 	DD	$L$SEH_info_ChaCha20_ctr32_nohw wrt ..imagebase | 
 |  | 
 | 	DD	$L$SEH_begin_ChaCha20_ctr32_ssse3 wrt ..imagebase | 
 | 	DD	$L$SEH_end_ChaCha20_ctr32_ssse3 wrt ..imagebase | 
 | 	DD	$L$SEH_info_ChaCha20_ctr32_ssse3 wrt ..imagebase | 
 |  | 
 | 	DD	$L$SEH_begin_ChaCha20_ctr32_ssse3_4x wrt ..imagebase | 
 | 	DD	$L$SEH_end_ChaCha20_ctr32_ssse3_4x wrt ..imagebase | 
 | 	DD	$L$SEH_info_ChaCha20_ctr32_ssse3_4x wrt ..imagebase | 
 | 	DD	$L$SEH_begin_ChaCha20_ctr32_avx2 wrt ..imagebase | 
 | 	DD	$L$SEH_end_ChaCha20_ctr32_avx2 wrt ..imagebase | 
 | 	DD	$L$SEH_info_ChaCha20_ctr32_avx2 wrt ..imagebase | 
 | section	.xdata rdata align=8 | 
 | ALIGN	8 | 
 | $L$SEH_info_ChaCha20_ctr32_nohw: | 
 | 	DB	9,0,0,0 | 
 | 	DD	se_handler wrt ..imagebase | 
 |  | 
 | $L$SEH_info_ChaCha20_ctr32_ssse3: | 
 | 	DB	9,0,0,0 | 
 | 	DD	ssse3_handler wrt ..imagebase | 
 | 	DD	$L$ssse3_body wrt ..imagebase,$L$ssse3_epilogue wrt ..imagebase | 
 |  | 
 | $L$SEH_info_ChaCha20_ctr32_ssse3_4x: | 
 | 	DB	9,0,0,0 | 
 | 	DD	full_handler wrt ..imagebase | 
 | 	DD	$L$4x_body wrt ..imagebase,$L$4x_epilogue wrt ..imagebase | 
 | $L$SEH_info_ChaCha20_ctr32_avx2: | 
 | 	DB	9,0,0,0 | 
 | 	DD	full_handler wrt ..imagebase | 
 | 	DD	$L$8x_body wrt ..imagebase,$L$8x_epilogue wrt ..imagebase | 
 | %else | 
 | ; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 | 
 | ret | 
 | %endif |