| ; This file is generated from a similarly-named Perl script in the BoringSSL | 
 | ; source tree. Do not edit by hand. | 
 |  | 
 | %ifidn __OUTPUT_FORMAT__, win64 | 
 | default	rel | 
 | %define XMMWORD | 
 | %define YMMWORD | 
 | %define ZMMWORD | 
 | %define _CET_ENDBR | 
 |  | 
 | %ifdef BORINGSSL_PREFIX | 
 | %include "boringssl_prefix_symbols_nasm.inc" | 
 | %endif | 
 | section	.rdata rdata align=8 | 
 |  | 
 | ALIGN	16 | 
 | one: | 
 | 	DQ	1,0 | 
 | two: | 
 | 	DQ	2,0 | 
 | three: | 
 | 	DQ	3,0 | 
 | four: | 
 | 	DQ	4,0 | 
 | five: | 
 | 	DQ	5,0 | 
 | six: | 
 | 	DQ	6,0 | 
 | seven: | 
 | 	DQ	7,0 | 
 | eight: | 
 | 	DQ	8,0 | 
 |  | 
 | OR_MASK: | 
 | 	DD	0x00000000,0x00000000,0x00000000,0x80000000 | 
 | poly: | 
 | 	DQ	0x1,0xc200000000000000 | 
 | mask: | 
 | 	DD	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d | 
 | con1: | 
 | 	DD	1,1,1,1 | 
 | con2: | 
 | 	DD	0x1b,0x1b,0x1b,0x1b | 
 | con3: | 
 | 	DB	-1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7 | 
 | and_mask: | 
 | 	DD	0,0xffffffff,0xffffffff,0xffffffff | 
 | section	.text code align=64 | 
 |  | 
 |  | 
 | ALIGN	16 | 
 | GFMUL: | 
 |  | 
 | 	vpclmulqdq	xmm2,xmm0,xmm1,0x00 | 
 | 	vpclmulqdq	xmm5,xmm0,xmm1,0x11 | 
 | 	vpclmulqdq	xmm3,xmm0,xmm1,0x10 | 
 | 	vpclmulqdq	xmm4,xmm0,xmm1,0x01 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpslldq	xmm4,xmm3,8 | 
 | 	vpsrldq	xmm3,xmm3,8 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpxor	xmm5,xmm5,xmm3 | 
 |  | 
 | 	vpclmulqdq	xmm3,xmm2,XMMWORD[poly],0x10 | 
 | 	vpshufd	xmm4,xmm2,78 | 
 | 	vpxor	xmm2,xmm3,xmm4 | 
 |  | 
 | 	vpclmulqdq	xmm3,xmm2,XMMWORD[poly],0x10 | 
 | 	vpshufd	xmm4,xmm2,78 | 
 | 	vpxor	xmm2,xmm3,xmm4 | 
 |  | 
 | 	vpxor	xmm0,xmm2,xmm5 | 
 | 	ret | 
 |  | 
 |  | 
 | global	aesgcmsiv_htable_init | 
 |  | 
 | ALIGN	16 | 
 | aesgcmsiv_htable_init: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aesgcmsiv_htable_init: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	vmovdqa	xmm0,XMMWORD[rsi] | 
 | 	vmovdqa	xmm1,xmm0 | 
 | 	vmovdqa	XMMWORD[rdi],xmm0 | 
 | 	call	GFMUL | 
 | 	vmovdqa	XMMWORD[16+rdi],xmm0 | 
 | 	call	GFMUL | 
 | 	vmovdqa	XMMWORD[32+rdi],xmm0 | 
 | 	call	GFMUL | 
 | 	vmovdqa	XMMWORD[48+rdi],xmm0 | 
 | 	call	GFMUL | 
 | 	vmovdqa	XMMWORD[64+rdi],xmm0 | 
 | 	call	GFMUL | 
 | 	vmovdqa	XMMWORD[80+rdi],xmm0 | 
 | 	call	GFMUL | 
 | 	vmovdqa	XMMWORD[96+rdi],xmm0 | 
 | 	call	GFMUL | 
 | 	vmovdqa	XMMWORD[112+rdi],xmm0 | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aesgcmsiv_htable_init: | 
 | global	aesgcmsiv_htable6_init | 
 |  | 
 | ALIGN	16 | 
 | aesgcmsiv_htable6_init: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aesgcmsiv_htable6_init: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	vmovdqa	xmm0,XMMWORD[rsi] | 
 | 	vmovdqa	xmm1,xmm0 | 
 | 	vmovdqa	XMMWORD[rdi],xmm0 | 
 | 	call	GFMUL | 
 | 	vmovdqa	XMMWORD[16+rdi],xmm0 | 
 | 	call	GFMUL | 
 | 	vmovdqa	XMMWORD[32+rdi],xmm0 | 
 | 	call	GFMUL | 
 | 	vmovdqa	XMMWORD[48+rdi],xmm0 | 
 | 	call	GFMUL | 
 | 	vmovdqa	XMMWORD[64+rdi],xmm0 | 
 | 	call	GFMUL | 
 | 	vmovdqa	XMMWORD[80+rdi],xmm0 | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aesgcmsiv_htable6_init: | 
 | global	aesgcmsiv_htable_polyval | 
 |  | 
 | ALIGN	16 | 
 | aesgcmsiv_htable_polyval: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aesgcmsiv_htable_polyval: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 | 	mov	rcx,r9 | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	test	rdx,rdx | 
 | 	jnz	NEAR $L$htable_polyval_start | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$htable_polyval_start: | 
 | 	vzeroall | 
 |  | 
 |  | 
 |  | 
 | 	mov	r11,rdx | 
 | 	and	r11,127 | 
 |  | 
 | 	jz	NEAR $L$htable_polyval_no_prefix | 
 |  | 
 | 	vpxor	xmm9,xmm9,xmm9 | 
 | 	vmovdqa	xmm1,XMMWORD[rcx] | 
 | 	sub	rdx,r11 | 
 |  | 
 | 	sub	r11,16 | 
 |  | 
 |  | 
 | 	vmovdqu	xmm0,XMMWORD[rsi] | 
 | 	vpxor	xmm0,xmm0,xmm1 | 
 |  | 
 | 	vpclmulqdq	xmm5,xmm0,XMMWORD[r11*1+rdi],0x01 | 
 | 	vpclmulqdq	xmm3,xmm0,XMMWORD[r11*1+rdi],0x00 | 
 | 	vpclmulqdq	xmm4,xmm0,XMMWORD[r11*1+rdi],0x11 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x10 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 |  | 
 | 	lea	rsi,[16+rsi] | 
 | 	test	r11,r11 | 
 | 	jnz	NEAR $L$htable_polyval_prefix_loop | 
 | 	jmp	NEAR $L$htable_polyval_prefix_complete | 
 |  | 
 |  | 
 | ALIGN	64 | 
 | $L$htable_polyval_prefix_loop: | 
 | 	sub	r11,16 | 
 |  | 
 | 	vmovdqu	xmm0,XMMWORD[rsi] | 
 |  | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x00 | 
 | 	vpxor	xmm3,xmm3,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x11 | 
 | 	vpxor	xmm4,xmm4,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x01 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x10 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 |  | 
 | 	test	r11,r11 | 
 |  | 
 | 	lea	rsi,[16+rsi] | 
 |  | 
 | 	jnz	NEAR $L$htable_polyval_prefix_loop | 
 |  | 
 | $L$htable_polyval_prefix_complete: | 
 | 	vpsrldq	xmm6,xmm5,8 | 
 | 	vpslldq	xmm5,xmm5,8 | 
 |  | 
 | 	vpxor	xmm9,xmm4,xmm6 | 
 | 	vpxor	xmm1,xmm3,xmm5 | 
 |  | 
 | 	jmp	NEAR $L$htable_polyval_main_loop | 
 |  | 
 | $L$htable_polyval_no_prefix: | 
 |  | 
 |  | 
 |  | 
 |  | 
 | 	vpxor	xmm1,xmm1,xmm1 | 
 | 	vmovdqa	xmm9,XMMWORD[rcx] | 
 |  | 
 | ALIGN	64 | 
 | $L$htable_polyval_main_loop: | 
 | 	sub	rdx,0x80 | 
 | 	jb	NEAR $L$htable_polyval_out | 
 |  | 
 | 	vmovdqu	xmm0,XMMWORD[112+rsi] | 
 |  | 
 | 	vpclmulqdq	xmm5,xmm0,XMMWORD[rdi],0x01 | 
 | 	vpclmulqdq	xmm3,xmm0,XMMWORD[rdi],0x00 | 
 | 	vpclmulqdq	xmm4,xmm0,XMMWORD[rdi],0x11 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[rdi],0x10 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 |  | 
 |  | 
 | 	vmovdqu	xmm0,XMMWORD[96+rsi] | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x01 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x00 | 
 | 	vpxor	xmm3,xmm3,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x11 | 
 | 	vpxor	xmm4,xmm4,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x10 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 |  | 
 |  | 
 |  | 
 | 	vmovdqu	xmm0,XMMWORD[80+rsi] | 
 |  | 
 | 	vpclmulqdq	xmm7,xmm1,XMMWORD[poly],0x10 | 
 | 	vpalignr	xmm1,xmm1,xmm1,8 | 
 |  | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x01 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x00 | 
 | 	vpxor	xmm3,xmm3,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x11 | 
 | 	vpxor	xmm4,xmm4,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x10 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 |  | 
 |  | 
 | 	vpxor	xmm1,xmm1,xmm7 | 
 |  | 
 | 	vmovdqu	xmm0,XMMWORD[64+rsi] | 
 |  | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x01 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x00 | 
 | 	vpxor	xmm3,xmm3,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x11 | 
 | 	vpxor	xmm4,xmm4,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x10 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 |  | 
 |  | 
 | 	vmovdqu	xmm0,XMMWORD[48+rsi] | 
 |  | 
 | 	vpclmulqdq	xmm7,xmm1,XMMWORD[poly],0x10 | 
 | 	vpalignr	xmm1,xmm1,xmm1,8 | 
 |  | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x01 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x00 | 
 | 	vpxor	xmm3,xmm3,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x11 | 
 | 	vpxor	xmm4,xmm4,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x10 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 |  | 
 |  | 
 | 	vpxor	xmm1,xmm1,xmm7 | 
 |  | 
 | 	vmovdqu	xmm0,XMMWORD[32+rsi] | 
 |  | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x01 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x00 | 
 | 	vpxor	xmm3,xmm3,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x11 | 
 | 	vpxor	xmm4,xmm4,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x10 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 |  | 
 |  | 
 | 	vpxor	xmm1,xmm1,xmm9 | 
 |  | 
 | 	vmovdqu	xmm0,XMMWORD[16+rsi] | 
 |  | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x01 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x00 | 
 | 	vpxor	xmm3,xmm3,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x11 | 
 | 	vpxor	xmm4,xmm4,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x10 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 |  | 
 |  | 
 | 	vmovdqu	xmm0,XMMWORD[rsi] | 
 | 	vpxor	xmm0,xmm0,xmm1 | 
 |  | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x01 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x00 | 
 | 	vpxor	xmm3,xmm3,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x11 | 
 | 	vpxor	xmm4,xmm4,xmm6 | 
 | 	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x10 | 
 | 	vpxor	xmm5,xmm5,xmm6 | 
 |  | 
 |  | 
 | 	vpsrldq	xmm6,xmm5,8 | 
 | 	vpslldq	xmm5,xmm5,8 | 
 |  | 
 | 	vpxor	xmm9,xmm4,xmm6 | 
 | 	vpxor	xmm1,xmm3,xmm5 | 
 |  | 
 | 	lea	rsi,[128+rsi] | 
 | 	jmp	NEAR $L$htable_polyval_main_loop | 
 |  | 
 |  | 
 |  | 
 | $L$htable_polyval_out: | 
 | 	vpclmulqdq	xmm6,xmm1,XMMWORD[poly],0x10 | 
 | 	vpalignr	xmm1,xmm1,xmm1,8 | 
 | 	vpxor	xmm1,xmm1,xmm6 | 
 |  | 
 | 	vpclmulqdq	xmm6,xmm1,XMMWORD[poly],0x10 | 
 | 	vpalignr	xmm1,xmm1,xmm1,8 | 
 | 	vpxor	xmm1,xmm1,xmm6 | 
 | 	vpxor	xmm1,xmm1,xmm9 | 
 |  | 
 | 	vmovdqu	XMMWORD[rcx],xmm1 | 
 | 	vzeroupper | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aesgcmsiv_htable_polyval: | 
 | global	aesgcmsiv_polyval_horner | 
 |  | 
 | ALIGN	16 | 
 | aesgcmsiv_polyval_horner: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aesgcmsiv_polyval_horner: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 | 	mov	rcx,r9 | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	test	rcx,rcx | 
 | 	jnz	NEAR $L$polyval_horner_start | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$polyval_horner_start: | 
 |  | 
 |  | 
 |  | 
 | 	xor	r10,r10 | 
 | 	shl	rcx,4 | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[rsi] | 
 | 	vmovdqa	xmm0,XMMWORD[rdi] | 
 |  | 
 | $L$polyval_horner_loop: | 
 | 	vpxor	xmm0,xmm0,XMMWORD[r10*1+rdx] | 
 | 	call	GFMUL | 
 |  | 
 | 	add	r10,16 | 
 | 	cmp	rcx,r10 | 
 | 	jne	NEAR $L$polyval_horner_loop | 
 |  | 
 |  | 
 | 	vmovdqa	XMMWORD[rdi],xmm0 | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aesgcmsiv_polyval_horner: | 
 | global	aes128gcmsiv_aes_ks | 
 |  | 
 | ALIGN	16 | 
 | aes128gcmsiv_aes_ks: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aes128gcmsiv_aes_ks: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	vmovdqu	xmm1,XMMWORD[rdi] | 
 | 	vmovdqa	XMMWORD[rsi],xmm1 | 
 |  | 
 | 	vmovdqa	xmm0,XMMWORD[con1] | 
 | 	vmovdqa	xmm15,XMMWORD[mask] | 
 |  | 
 | 	mov	rax,8 | 
 |  | 
 | $L$ks128_loop: | 
 | 	add	rsi,16 | 
 | 	sub	rax,1 | 
 | 	vpshufb	xmm2,xmm1,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpslldq	xmm3,xmm1,4 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpslldq	xmm3,xmm3,4 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpslldq	xmm3,xmm3,4 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 | 	vmovdqa	XMMWORD[rsi],xmm1 | 
 | 	jne	NEAR $L$ks128_loop | 
 |  | 
 | 	vmovdqa	xmm0,XMMWORD[con2] | 
 | 	vpshufb	xmm2,xmm1,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpslldq	xmm3,xmm1,4 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpslldq	xmm3,xmm3,4 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpslldq	xmm3,xmm3,4 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 | 	vmovdqa	XMMWORD[16+rsi],xmm1 | 
 |  | 
 | 	vpshufb	xmm2,xmm1,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslldq	xmm3,xmm1,4 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpslldq	xmm3,xmm3,4 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpslldq	xmm3,xmm3,4 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 | 	vmovdqa	XMMWORD[32+rsi],xmm1 | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aes128gcmsiv_aes_ks: | 
 | global	aes256gcmsiv_aes_ks | 
 |  | 
 | ALIGN	16 | 
 | aes256gcmsiv_aes_ks: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aes256gcmsiv_aes_ks: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	vmovdqu	xmm1,XMMWORD[rdi] | 
 | 	vmovdqu	xmm3,XMMWORD[16+rdi] | 
 | 	vmovdqa	XMMWORD[rsi],xmm1 | 
 | 	vmovdqa	XMMWORD[16+rsi],xmm3 | 
 | 	vmovdqa	xmm0,XMMWORD[con1] | 
 | 	vmovdqa	xmm15,XMMWORD[mask] | 
 | 	vpxor	xmm14,xmm14,xmm14 | 
 | 	mov	rax,6 | 
 |  | 
 | $L$ks256_loop: | 
 | 	add	rsi,32 | 
 | 	sub	rax,1 | 
 | 	vpshufb	xmm2,xmm3,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpsllq	xmm4,xmm1,32 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpshufb	xmm4,xmm1,XMMWORD[con3] | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 | 	vmovdqa	XMMWORD[rsi],xmm1 | 
 | 	vpshufd	xmm2,xmm1,0xff | 
 | 	vaesenclast	xmm2,xmm2,xmm14 | 
 | 	vpsllq	xmm4,xmm3,32 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpshufb	xmm4,xmm3,XMMWORD[con3] | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpxor	xmm3,xmm3,xmm2 | 
 | 	vmovdqa	XMMWORD[16+rsi],xmm3 | 
 | 	jne	NEAR $L$ks256_loop | 
 |  | 
 | 	vpshufb	xmm2,xmm3,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpsllq	xmm4,xmm1,32 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpshufb	xmm4,xmm1,XMMWORD[con3] | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 | 	vmovdqa	XMMWORD[32+rsi],xmm1 | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | global	aes128gcmsiv_aes_ks_enc_x1 | 
 |  | 
 | ALIGN	16 | 
 | aes128gcmsiv_aes_ks_enc_x1: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aes128gcmsiv_aes_ks_enc_x1: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 | 	mov	rcx,r9 | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	vmovdqa	xmm1,XMMWORD[rcx] | 
 | 	vmovdqa	xmm4,XMMWORD[rdi] | 
 |  | 
 | 	vmovdqa	XMMWORD[rdx],xmm1 | 
 | 	vpxor	xmm4,xmm4,xmm1 | 
 |  | 
 | 	vmovdqa	xmm0,XMMWORD[con1] | 
 | 	vmovdqa	xmm15,XMMWORD[mask] | 
 |  | 
 | 	vpshufb	xmm2,xmm1,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpsllq	xmm3,xmm1,32 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpshufb	xmm3,xmm1,XMMWORD[con3] | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 |  | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vmovdqa	XMMWORD[16+rdx],xmm1 | 
 |  | 
 | 	vpshufb	xmm2,xmm1,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpsllq	xmm3,xmm1,32 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpshufb	xmm3,xmm1,XMMWORD[con3] | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 |  | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vmovdqa	XMMWORD[32+rdx],xmm1 | 
 |  | 
 | 	vpshufb	xmm2,xmm1,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpsllq	xmm3,xmm1,32 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpshufb	xmm3,xmm1,XMMWORD[con3] | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 |  | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vmovdqa	XMMWORD[48+rdx],xmm1 | 
 |  | 
 | 	vpshufb	xmm2,xmm1,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpsllq	xmm3,xmm1,32 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpshufb	xmm3,xmm1,XMMWORD[con3] | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 |  | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vmovdqa	XMMWORD[64+rdx],xmm1 | 
 |  | 
 | 	vpshufb	xmm2,xmm1,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpsllq	xmm3,xmm1,32 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpshufb	xmm3,xmm1,XMMWORD[con3] | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 |  | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vmovdqa	XMMWORD[80+rdx],xmm1 | 
 |  | 
 | 	vpshufb	xmm2,xmm1,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpsllq	xmm3,xmm1,32 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpshufb	xmm3,xmm1,XMMWORD[con3] | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 |  | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vmovdqa	XMMWORD[96+rdx],xmm1 | 
 |  | 
 | 	vpshufb	xmm2,xmm1,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpsllq	xmm3,xmm1,32 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpshufb	xmm3,xmm1,XMMWORD[con3] | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 |  | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vmovdqa	XMMWORD[112+rdx],xmm1 | 
 |  | 
 | 	vpshufb	xmm2,xmm1,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpsllq	xmm3,xmm1,32 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpshufb	xmm3,xmm1,XMMWORD[con3] | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 |  | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vmovdqa	XMMWORD[128+rdx],xmm1 | 
 |  | 
 |  | 
 | 	vmovdqa	xmm0,XMMWORD[con2] | 
 |  | 
 | 	vpshufb	xmm2,xmm1,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpsllq	xmm3,xmm1,32 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpshufb	xmm3,xmm1,XMMWORD[con3] | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 |  | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vmovdqa	XMMWORD[144+rdx],xmm1 | 
 |  | 
 | 	vpshufb	xmm2,xmm1,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpsllq	xmm3,xmm1,32 | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpshufb	xmm3,xmm1,XMMWORD[con3] | 
 | 	vpxor	xmm1,xmm1,xmm3 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 |  | 
 | 	vaesenclast	xmm4,xmm4,xmm1 | 
 | 	vmovdqa	XMMWORD[160+rdx],xmm1 | 
 |  | 
 |  | 
 | 	vmovdqa	XMMWORD[rsi],xmm4 | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aes128gcmsiv_aes_ks_enc_x1: | 
 | global	aes128gcmsiv_kdf | 
 |  | 
 | ALIGN	16 | 
 | aes128gcmsiv_kdf: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aes128gcmsiv_kdf: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 |  | 
 |  | 
 |  | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[rdx] | 
 | 	vmovdqa	xmm9,XMMWORD[rdi] | 
 | 	vmovdqa	xmm12,XMMWORD[and_mask] | 
 | 	vmovdqa	xmm13,XMMWORD[one] | 
 | 	vpshufd	xmm9,xmm9,0x90 | 
 | 	vpand	xmm9,xmm9,xmm12 | 
 | 	vpaddd	xmm10,xmm9,xmm13 | 
 | 	vpaddd	xmm11,xmm10,xmm13 | 
 | 	vpaddd	xmm12,xmm11,xmm13 | 
 |  | 
 | 	vpxor	xmm9,xmm9,xmm1 | 
 | 	vpxor	xmm10,xmm10,xmm1 | 
 | 	vpxor	xmm11,xmm11,xmm1 | 
 | 	vpxor	xmm12,xmm12,xmm1 | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[16+rdx] | 
 | 	vaesenc	xmm9,xmm9,xmm1 | 
 | 	vaesenc	xmm10,xmm10,xmm1 | 
 | 	vaesenc	xmm11,xmm11,xmm1 | 
 | 	vaesenc	xmm12,xmm12,xmm1 | 
 |  | 
 | 	vmovdqa	xmm2,XMMWORD[32+rdx] | 
 | 	vaesenc	xmm9,xmm9,xmm2 | 
 | 	vaesenc	xmm10,xmm10,xmm2 | 
 | 	vaesenc	xmm11,xmm11,xmm2 | 
 | 	vaesenc	xmm12,xmm12,xmm2 | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[48+rdx] | 
 | 	vaesenc	xmm9,xmm9,xmm1 | 
 | 	vaesenc	xmm10,xmm10,xmm1 | 
 | 	vaesenc	xmm11,xmm11,xmm1 | 
 | 	vaesenc	xmm12,xmm12,xmm1 | 
 |  | 
 | 	vmovdqa	xmm2,XMMWORD[64+rdx] | 
 | 	vaesenc	xmm9,xmm9,xmm2 | 
 | 	vaesenc	xmm10,xmm10,xmm2 | 
 | 	vaesenc	xmm11,xmm11,xmm2 | 
 | 	vaesenc	xmm12,xmm12,xmm2 | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[80+rdx] | 
 | 	vaesenc	xmm9,xmm9,xmm1 | 
 | 	vaesenc	xmm10,xmm10,xmm1 | 
 | 	vaesenc	xmm11,xmm11,xmm1 | 
 | 	vaesenc	xmm12,xmm12,xmm1 | 
 |  | 
 | 	vmovdqa	xmm2,XMMWORD[96+rdx] | 
 | 	vaesenc	xmm9,xmm9,xmm2 | 
 | 	vaesenc	xmm10,xmm10,xmm2 | 
 | 	vaesenc	xmm11,xmm11,xmm2 | 
 | 	vaesenc	xmm12,xmm12,xmm2 | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[112+rdx] | 
 | 	vaesenc	xmm9,xmm9,xmm1 | 
 | 	vaesenc	xmm10,xmm10,xmm1 | 
 | 	vaesenc	xmm11,xmm11,xmm1 | 
 | 	vaesenc	xmm12,xmm12,xmm1 | 
 |  | 
 | 	vmovdqa	xmm2,XMMWORD[128+rdx] | 
 | 	vaesenc	xmm9,xmm9,xmm2 | 
 | 	vaesenc	xmm10,xmm10,xmm2 | 
 | 	vaesenc	xmm11,xmm11,xmm2 | 
 | 	vaesenc	xmm12,xmm12,xmm2 | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[144+rdx] | 
 | 	vaesenc	xmm9,xmm9,xmm1 | 
 | 	vaesenc	xmm10,xmm10,xmm1 | 
 | 	vaesenc	xmm11,xmm11,xmm1 | 
 | 	vaesenc	xmm12,xmm12,xmm1 | 
 |  | 
 | 	vmovdqa	xmm2,XMMWORD[160+rdx] | 
 | 	vaesenclast	xmm9,xmm9,xmm2 | 
 | 	vaesenclast	xmm10,xmm10,xmm2 | 
 | 	vaesenclast	xmm11,xmm11,xmm2 | 
 | 	vaesenclast	xmm12,xmm12,xmm2 | 
 |  | 
 |  | 
 | 	vmovdqa	XMMWORD[rsi],xmm9 | 
 | 	vmovdqa	XMMWORD[16+rsi],xmm10 | 
 | 	vmovdqa	XMMWORD[32+rsi],xmm11 | 
 | 	vmovdqa	XMMWORD[48+rsi],xmm12 | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aes128gcmsiv_kdf: | 
 | global	aes128gcmsiv_enc_msg_x4 | 
 |  | 
 | ALIGN	16 | 
 | aes128gcmsiv_enc_msg_x4: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aes128gcmsiv_enc_msg_x4: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 | 	mov	rcx,r9 | 
 | 	mov	r8,QWORD[40+rsp] | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	test	r8,r8 | 
 | 	jnz	NEAR $L$128_enc_msg_x4_start | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$128_enc_msg_x4_start: | 
 | 	push	r12 | 
 |  | 
 | 	push	r13 | 
 |  | 
 |  | 
 | 	shr	r8,4 | 
 | 	mov	r10,r8 | 
 | 	shl	r10,62 | 
 | 	shr	r10,62 | 
 |  | 
 |  | 
 | 	vmovdqa	xmm15,XMMWORD[rdx] | 
 | 	vpor	xmm15,xmm15,XMMWORD[OR_MASK] | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[four] | 
 | 	vmovdqa	xmm0,xmm15 | 
 | 	vpaddd	xmm1,xmm15,XMMWORD[one] | 
 | 	vpaddd	xmm2,xmm15,XMMWORD[two] | 
 | 	vpaddd	xmm3,xmm15,XMMWORD[three] | 
 |  | 
 | 	shr	r8,2 | 
 | 	je	NEAR $L$128_enc_msg_x4_check_remainder | 
 |  | 
 | 	sub	rsi,64 | 
 | 	sub	rdi,64 | 
 |  | 
 | $L$128_enc_msg_x4_loop1: | 
 | 	add	rsi,64 | 
 | 	add	rdi,64 | 
 |  | 
 | 	vmovdqa	xmm5,xmm0 | 
 | 	vmovdqa	xmm6,xmm1 | 
 | 	vmovdqa	xmm7,xmm2 | 
 | 	vmovdqa	xmm8,xmm3 | 
 |  | 
 | 	vpxor	xmm5,xmm5,XMMWORD[rcx] | 
 | 	vpxor	xmm6,xmm6,XMMWORD[rcx] | 
 | 	vpxor	xmm7,xmm7,XMMWORD[rcx] | 
 | 	vpxor	xmm8,xmm8,XMMWORD[rcx] | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[16+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vpaddd	xmm0,xmm0,xmm4 | 
 | 	vmovdqu	xmm12,XMMWORD[32+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vpaddd	xmm1,xmm1,xmm4 | 
 | 	vmovdqu	xmm12,XMMWORD[48+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vpaddd	xmm2,xmm2,xmm4 | 
 | 	vmovdqu	xmm12,XMMWORD[64+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vpaddd	xmm3,xmm3,xmm4 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[80+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[96+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[112+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[128+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[144+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[160+rcx] | 
 | 	vaesenclast	xmm5,xmm5,xmm12 | 
 | 	vaesenclast	xmm6,xmm6,xmm12 | 
 | 	vaesenclast	xmm7,xmm7,xmm12 | 
 | 	vaesenclast	xmm8,xmm8,xmm12 | 
 |  | 
 |  | 
 |  | 
 | 	vpxor	xmm5,xmm5,XMMWORD[rdi] | 
 | 	vpxor	xmm6,xmm6,XMMWORD[16+rdi] | 
 | 	vpxor	xmm7,xmm7,XMMWORD[32+rdi] | 
 | 	vpxor	xmm8,xmm8,XMMWORD[48+rdi] | 
 |  | 
 | 	sub	r8,1 | 
 |  | 
 | 	vmovdqu	XMMWORD[rsi],xmm5 | 
 | 	vmovdqu	XMMWORD[16+rsi],xmm6 | 
 | 	vmovdqu	XMMWORD[32+rsi],xmm7 | 
 | 	vmovdqu	XMMWORD[48+rsi],xmm8 | 
 |  | 
 | 	jne	NEAR $L$128_enc_msg_x4_loop1 | 
 |  | 
 | 	add	rsi,64 | 
 | 	add	rdi,64 | 
 |  | 
 | $L$128_enc_msg_x4_check_remainder: | 
 | 	cmp	r10,0 | 
 | 	je	NEAR $L$128_enc_msg_x4_out | 
 |  | 
 | $L$128_enc_msg_x4_loop2: | 
 |  | 
 |  | 
 | 	vmovdqa	xmm5,xmm0 | 
 | 	vpaddd	xmm0,xmm0,XMMWORD[one] | 
 |  | 
 | 	vpxor	xmm5,xmm5,XMMWORD[rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[16+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[32+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[48+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[64+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[80+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[96+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[112+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[128+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[144+rcx] | 
 | 	vaesenclast	xmm5,xmm5,XMMWORD[160+rcx] | 
 |  | 
 |  | 
 | 	vpxor	xmm5,xmm5,XMMWORD[rdi] | 
 | 	vmovdqu	XMMWORD[rsi],xmm5 | 
 |  | 
 | 	add	rdi,16 | 
 | 	add	rsi,16 | 
 |  | 
 | 	sub	r10,1 | 
 | 	jne	NEAR $L$128_enc_msg_x4_loop2 | 
 |  | 
 | $L$128_enc_msg_x4_out: | 
 | 	pop	r13 | 
 |  | 
 | 	pop	r12 | 
 |  | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aes128gcmsiv_enc_msg_x4: | 
 | global	aes128gcmsiv_enc_msg_x8 | 
 |  | 
 | ALIGN	16 | 
 | aes128gcmsiv_enc_msg_x8: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aes128gcmsiv_enc_msg_x8: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 | 	mov	rcx,r9 | 
 | 	mov	r8,QWORD[40+rsp] | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	test	r8,r8 | 
 | 	jnz	NEAR $L$128_enc_msg_x8_start | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$128_enc_msg_x8_start: | 
 | 	push	r12 | 
 |  | 
 | 	push	r13 | 
 |  | 
 | 	push	rbp | 
 |  | 
 | 	mov	rbp,rsp | 
 |  | 
 |  | 
 |  | 
 | 	sub	rsp,128 | 
 | 	and	rsp,-64 | 
 |  | 
 | 	shr	r8,4 | 
 | 	mov	r10,r8 | 
 | 	shl	r10,61 | 
 | 	shr	r10,61 | 
 |  | 
 |  | 
 | 	vmovdqu	xmm1,XMMWORD[rdx] | 
 | 	vpor	xmm1,xmm1,XMMWORD[OR_MASK] | 
 |  | 
 |  | 
 | 	vpaddd	xmm0,xmm1,XMMWORD[seven] | 
 | 	vmovdqu	XMMWORD[rsp],xmm0 | 
 | 	vpaddd	xmm9,xmm1,XMMWORD[one] | 
 | 	vpaddd	xmm10,xmm1,XMMWORD[two] | 
 | 	vpaddd	xmm11,xmm1,XMMWORD[three] | 
 | 	vpaddd	xmm12,xmm1,XMMWORD[four] | 
 | 	vpaddd	xmm13,xmm1,XMMWORD[five] | 
 | 	vpaddd	xmm14,xmm1,XMMWORD[six] | 
 | 	vmovdqa	xmm0,xmm1 | 
 |  | 
 | 	shr	r8,3 | 
 | 	je	NEAR $L$128_enc_msg_x8_check_remainder | 
 |  | 
 | 	sub	rsi,128 | 
 | 	sub	rdi,128 | 
 |  | 
 | $L$128_enc_msg_x8_loop1: | 
 | 	add	rsi,128 | 
 | 	add	rdi,128 | 
 |  | 
 | 	vmovdqa	xmm1,xmm0 | 
 | 	vmovdqa	xmm2,xmm9 | 
 | 	vmovdqa	xmm3,xmm10 | 
 | 	vmovdqa	xmm4,xmm11 | 
 | 	vmovdqa	xmm5,xmm12 | 
 | 	vmovdqa	xmm6,xmm13 | 
 | 	vmovdqa	xmm7,xmm14 | 
 |  | 
 | 	vmovdqu	xmm8,XMMWORD[rsp] | 
 |  | 
 | 	vpxor	xmm1,xmm1,XMMWORD[rcx] | 
 | 	vpxor	xmm2,xmm2,XMMWORD[rcx] | 
 | 	vpxor	xmm3,xmm3,XMMWORD[rcx] | 
 | 	vpxor	xmm4,xmm4,XMMWORD[rcx] | 
 | 	vpxor	xmm5,xmm5,XMMWORD[rcx] | 
 | 	vpxor	xmm6,xmm6,XMMWORD[rcx] | 
 | 	vpxor	xmm7,xmm7,XMMWORD[rcx] | 
 | 	vpxor	xmm8,xmm8,XMMWORD[rcx] | 
 |  | 
 | 	vmovdqu	xmm15,XMMWORD[16+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vmovdqu	xmm14,XMMWORD[rsp] | 
 | 	vpaddd	xmm14,xmm14,XMMWORD[eight] | 
 | 	vmovdqu	XMMWORD[rsp],xmm14 | 
 | 	vmovdqu	xmm15,XMMWORD[32+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vpsubd	xmm14,xmm14,XMMWORD[one] | 
 | 	vmovdqu	xmm15,XMMWORD[48+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vpaddd	xmm0,xmm0,XMMWORD[eight] | 
 | 	vmovdqu	xmm15,XMMWORD[64+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vpaddd	xmm9,xmm9,XMMWORD[eight] | 
 | 	vmovdqu	xmm15,XMMWORD[80+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vpaddd	xmm10,xmm10,XMMWORD[eight] | 
 | 	vmovdqu	xmm15,XMMWORD[96+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vpaddd	xmm11,xmm11,XMMWORD[eight] | 
 | 	vmovdqu	xmm15,XMMWORD[112+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vpaddd	xmm12,xmm12,XMMWORD[eight] | 
 | 	vmovdqu	xmm15,XMMWORD[128+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vpaddd	xmm13,xmm13,XMMWORD[eight] | 
 | 	vmovdqu	xmm15,XMMWORD[144+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vmovdqu	xmm15,XMMWORD[160+rcx] | 
 | 	vaesenclast	xmm1,xmm1,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm15 | 
 | 	vaesenclast	xmm3,xmm3,xmm15 | 
 | 	vaesenclast	xmm4,xmm4,xmm15 | 
 | 	vaesenclast	xmm5,xmm5,xmm15 | 
 | 	vaesenclast	xmm6,xmm6,xmm15 | 
 | 	vaesenclast	xmm7,xmm7,xmm15 | 
 | 	vaesenclast	xmm8,xmm8,xmm15 | 
 |  | 
 |  | 
 |  | 
 | 	vpxor	xmm1,xmm1,XMMWORD[rdi] | 
 | 	vpxor	xmm2,xmm2,XMMWORD[16+rdi] | 
 | 	vpxor	xmm3,xmm3,XMMWORD[32+rdi] | 
 | 	vpxor	xmm4,xmm4,XMMWORD[48+rdi] | 
 | 	vpxor	xmm5,xmm5,XMMWORD[64+rdi] | 
 | 	vpxor	xmm6,xmm6,XMMWORD[80+rdi] | 
 | 	vpxor	xmm7,xmm7,XMMWORD[96+rdi] | 
 | 	vpxor	xmm8,xmm8,XMMWORD[112+rdi] | 
 |  | 
 | 	dec	r8 | 
 |  | 
 | 	vmovdqu	XMMWORD[rsi],xmm1 | 
 | 	vmovdqu	XMMWORD[16+rsi],xmm2 | 
 | 	vmovdqu	XMMWORD[32+rsi],xmm3 | 
 | 	vmovdqu	XMMWORD[48+rsi],xmm4 | 
 | 	vmovdqu	XMMWORD[64+rsi],xmm5 | 
 | 	vmovdqu	XMMWORD[80+rsi],xmm6 | 
 | 	vmovdqu	XMMWORD[96+rsi],xmm7 | 
 | 	vmovdqu	XMMWORD[112+rsi],xmm8 | 
 |  | 
 | 	jne	NEAR $L$128_enc_msg_x8_loop1 | 
 |  | 
 | 	add	rsi,128 | 
 | 	add	rdi,128 | 
 |  | 
 | $L$128_enc_msg_x8_check_remainder: | 
 | 	cmp	r10,0 | 
 | 	je	NEAR $L$128_enc_msg_x8_out | 
 |  | 
 | $L$128_enc_msg_x8_loop2: | 
 |  | 
 |  | 
 | 	vmovdqa	xmm1,xmm0 | 
 | 	vpaddd	xmm0,xmm0,XMMWORD[one] | 
 |  | 
 | 	vpxor	xmm1,xmm1,XMMWORD[rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[16+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[32+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[48+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[64+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[80+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[96+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[112+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[128+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[144+rcx] | 
 | 	vaesenclast	xmm1,xmm1,XMMWORD[160+rcx] | 
 |  | 
 |  | 
 | 	vpxor	xmm1,xmm1,XMMWORD[rdi] | 
 |  | 
 | 	vmovdqu	XMMWORD[rsi],xmm1 | 
 |  | 
 | 	add	rdi,16 | 
 | 	add	rsi,16 | 
 |  | 
 | 	dec	r10 | 
 | 	jne	NEAR $L$128_enc_msg_x8_loop2 | 
 |  | 
 | $L$128_enc_msg_x8_out: | 
 | 	mov	rsp,rbp | 
 |  | 
 | 	pop	rbp | 
 |  | 
 | 	pop	r13 | 
 |  | 
 | 	pop	r12 | 
 |  | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aes128gcmsiv_enc_msg_x8: | 
 | global	aes128gcmsiv_dec | 
 |  | 
 | ALIGN	16 | 
 | aes128gcmsiv_dec: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aes128gcmsiv_dec: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 | 	mov	rcx,r9 | 
 | 	mov	r8,QWORD[40+rsp] | 
 | 	mov	r9,QWORD[48+rsp] | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	test	r9,~15 | 
 | 	jnz	NEAR $L$128_dec_start | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$128_dec_start: | 
 | 	vzeroupper | 
 | 	vmovdqa	xmm0,XMMWORD[rdx] | 
 |  | 
 |  | 
 | 	vmovdqu	xmm15,XMMWORD[16+rdx] | 
 | 	vpor	xmm15,xmm15,XMMWORD[OR_MASK] | 
 | 	mov	rax,rdx | 
 |  | 
 | 	lea	rax,[32+rax] | 
 | 	lea	rcx,[32+rcx] | 
 |  | 
 | 	and	r9,~15 | 
 |  | 
 |  | 
 | 	cmp	r9,96 | 
 | 	jb	NEAR $L$128_dec_loop2 | 
 |  | 
 |  | 
 | 	sub	r9,96 | 
 | 	vmovdqa	xmm7,xmm15 | 
 | 	vpaddd	xmm8,xmm7,XMMWORD[one] | 
 | 	vpaddd	xmm9,xmm7,XMMWORD[two] | 
 | 	vpaddd	xmm10,xmm9,XMMWORD[one] | 
 | 	vpaddd	xmm11,xmm9,XMMWORD[two] | 
 | 	vpaddd	xmm12,xmm11,XMMWORD[one] | 
 | 	vpaddd	xmm15,xmm11,XMMWORD[two] | 
 |  | 
 | 	vpxor	xmm7,xmm7,XMMWORD[r8] | 
 | 	vpxor	xmm8,xmm8,XMMWORD[r8] | 
 | 	vpxor	xmm9,xmm9,XMMWORD[r8] | 
 | 	vpxor	xmm10,xmm10,XMMWORD[r8] | 
 | 	vpxor	xmm11,xmm11,XMMWORD[r8] | 
 | 	vpxor	xmm12,xmm12,XMMWORD[r8] | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[16+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[32+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[48+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[64+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[80+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[96+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[112+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[128+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[144+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[160+r8] | 
 | 	vaesenclast	xmm7,xmm7,xmm4 | 
 | 	vaesenclast	xmm8,xmm8,xmm4 | 
 | 	vaesenclast	xmm9,xmm9,xmm4 | 
 | 	vaesenclast	xmm10,xmm10,xmm4 | 
 | 	vaesenclast	xmm11,xmm11,xmm4 | 
 | 	vaesenclast	xmm12,xmm12,xmm4 | 
 |  | 
 |  | 
 | 	vpxor	xmm7,xmm7,XMMWORD[rdi] | 
 | 	vpxor	xmm8,xmm8,XMMWORD[16+rdi] | 
 | 	vpxor	xmm9,xmm9,XMMWORD[32+rdi] | 
 | 	vpxor	xmm10,xmm10,XMMWORD[48+rdi] | 
 | 	vpxor	xmm11,xmm11,XMMWORD[64+rdi] | 
 | 	vpxor	xmm12,xmm12,XMMWORD[80+rdi] | 
 |  | 
 | 	vmovdqu	XMMWORD[rsi],xmm7 | 
 | 	vmovdqu	XMMWORD[16+rsi],xmm8 | 
 | 	vmovdqu	XMMWORD[32+rsi],xmm9 | 
 | 	vmovdqu	XMMWORD[48+rsi],xmm10 | 
 | 	vmovdqu	XMMWORD[64+rsi],xmm11 | 
 | 	vmovdqu	XMMWORD[80+rsi],xmm12 | 
 |  | 
 | 	add	rdi,96 | 
 | 	add	rsi,96 | 
 | 	jmp	NEAR $L$128_dec_loop1 | 
 |  | 
 |  | 
 | ALIGN	64 | 
 | $L$128_dec_loop1: | 
 | 	cmp	r9,96 | 
 | 	jb	NEAR $L$128_dec_finish_96 | 
 | 	sub	r9,96 | 
 |  | 
 | 	vmovdqa	xmm6,xmm12 | 
 | 	vmovdqa	XMMWORD[(16-32)+rax],xmm11 | 
 | 	vmovdqa	XMMWORD[(32-32)+rax],xmm10 | 
 | 	vmovdqa	XMMWORD[(48-32)+rax],xmm9 | 
 | 	vmovdqa	XMMWORD[(64-32)+rax],xmm8 | 
 | 	vmovdqa	XMMWORD[(80-32)+rax],xmm7 | 
 |  | 
 | 	vmovdqa	xmm7,xmm15 | 
 | 	vpaddd	xmm8,xmm7,XMMWORD[one] | 
 | 	vpaddd	xmm9,xmm7,XMMWORD[two] | 
 | 	vpaddd	xmm10,xmm9,XMMWORD[one] | 
 | 	vpaddd	xmm11,xmm9,XMMWORD[two] | 
 | 	vpaddd	xmm12,xmm11,XMMWORD[one] | 
 | 	vpaddd	xmm15,xmm11,XMMWORD[two] | 
 |  | 
 | 	vmovdqa	xmm4,XMMWORD[r8] | 
 | 	vpxor	xmm7,xmm7,xmm4 | 
 | 	vpxor	xmm8,xmm8,xmm4 | 
 | 	vpxor	xmm9,xmm9,xmm4 | 
 | 	vpxor	xmm10,xmm10,xmm4 | 
 | 	vpxor	xmm11,xmm11,xmm4 | 
 | 	vpxor	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[((0-32))+rcx] | 
 | 	vpclmulqdq	xmm2,xmm6,xmm4,0x11 | 
 | 	vpclmulqdq	xmm3,xmm6,xmm4,0x00 | 
 | 	vpclmulqdq	xmm1,xmm6,xmm4,0x01 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm4,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[16+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[((-16))+rax] | 
 | 	vmovdqu	xmm13,XMMWORD[((-16))+rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[32+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[rax] | 
 | 	vmovdqu	xmm13,XMMWORD[rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[48+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[16+rax] | 
 | 	vmovdqu	xmm13,XMMWORD[16+rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[64+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[32+rax] | 
 | 	vmovdqu	xmm13,XMMWORD[32+rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[80+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[96+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[112+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 |  | 
 | 	vmovdqa	xmm6,XMMWORD[((80-32))+rax] | 
 | 	vpxor	xmm6,xmm6,xmm0 | 
 | 	vmovdqu	xmm5,XMMWORD[((80-32))+rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[128+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 |  | 
 | 	vpsrldq	xmm4,xmm1,8 | 
 | 	vpxor	xmm5,xmm2,xmm4 | 
 | 	vpslldq	xmm4,xmm1,8 | 
 | 	vpxor	xmm0,xmm3,xmm4 | 
 |  | 
 | 	vmovdqa	xmm3,XMMWORD[poly] | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[144+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[160+r8] | 
 | 	vpalignr	xmm2,xmm0,xmm0,8 | 
 | 	vpclmulqdq	xmm0,xmm0,xmm3,0x10 | 
 | 	vpxor	xmm0,xmm2,xmm0 | 
 |  | 
 | 	vpxor	xmm4,xmm6,XMMWORD[rdi] | 
 | 	vaesenclast	xmm7,xmm7,xmm4 | 
 | 	vpxor	xmm4,xmm6,XMMWORD[16+rdi] | 
 | 	vaesenclast	xmm8,xmm8,xmm4 | 
 | 	vpxor	xmm4,xmm6,XMMWORD[32+rdi] | 
 | 	vaesenclast	xmm9,xmm9,xmm4 | 
 | 	vpxor	xmm4,xmm6,XMMWORD[48+rdi] | 
 | 	vaesenclast	xmm10,xmm10,xmm4 | 
 | 	vpxor	xmm4,xmm6,XMMWORD[64+rdi] | 
 | 	vaesenclast	xmm11,xmm11,xmm4 | 
 | 	vpxor	xmm4,xmm6,XMMWORD[80+rdi] | 
 | 	vaesenclast	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vpalignr	xmm2,xmm0,xmm0,8 | 
 | 	vpclmulqdq	xmm0,xmm0,xmm3,0x10 | 
 | 	vpxor	xmm0,xmm2,xmm0 | 
 |  | 
 | 	vmovdqu	XMMWORD[rsi],xmm7 | 
 | 	vmovdqu	XMMWORD[16+rsi],xmm8 | 
 | 	vmovdqu	XMMWORD[32+rsi],xmm9 | 
 | 	vmovdqu	XMMWORD[48+rsi],xmm10 | 
 | 	vmovdqu	XMMWORD[64+rsi],xmm11 | 
 | 	vmovdqu	XMMWORD[80+rsi],xmm12 | 
 |  | 
 | 	vpxor	xmm0,xmm0,xmm5 | 
 |  | 
 | 	lea	rdi,[96+rdi] | 
 | 	lea	rsi,[96+rsi] | 
 | 	jmp	NEAR $L$128_dec_loop1 | 
 |  | 
 | $L$128_dec_finish_96: | 
 | 	vmovdqa	xmm6,xmm12 | 
 | 	vmovdqa	XMMWORD[(16-32)+rax],xmm11 | 
 | 	vmovdqa	XMMWORD[(32-32)+rax],xmm10 | 
 | 	vmovdqa	XMMWORD[(48-32)+rax],xmm9 | 
 | 	vmovdqa	XMMWORD[(64-32)+rax],xmm8 | 
 | 	vmovdqa	XMMWORD[(80-32)+rax],xmm7 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[((0-32))+rcx] | 
 | 	vpclmulqdq	xmm1,xmm6,xmm4,0x10 | 
 | 	vpclmulqdq	xmm2,xmm6,xmm4,0x11 | 
 | 	vpclmulqdq	xmm3,xmm6,xmm4,0x00 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm4,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[((-16))+rax] | 
 | 	vmovdqu	xmm13,XMMWORD[((-16))+rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[rax] | 
 | 	vmovdqu	xmm13,XMMWORD[rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[16+rax] | 
 | 	vmovdqu	xmm13,XMMWORD[16+rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[32+rax] | 
 | 	vmovdqu	xmm13,XMMWORD[32+rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[((80-32))+rax] | 
 | 	vpxor	xmm6,xmm6,xmm0 | 
 | 	vmovdqu	xmm5,XMMWORD[((80-32))+rcx] | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 | 	vpsrldq	xmm4,xmm1,8 | 
 | 	vpxor	xmm5,xmm2,xmm4 | 
 | 	vpslldq	xmm4,xmm1,8 | 
 | 	vpxor	xmm0,xmm3,xmm4 | 
 |  | 
 | 	vmovdqa	xmm3,XMMWORD[poly] | 
 |  | 
 | 	vpalignr	xmm2,xmm0,xmm0,8 | 
 | 	vpclmulqdq	xmm0,xmm0,xmm3,0x10 | 
 | 	vpxor	xmm0,xmm2,xmm0 | 
 |  | 
 | 	vpalignr	xmm2,xmm0,xmm0,8 | 
 | 	vpclmulqdq	xmm0,xmm0,xmm3,0x10 | 
 | 	vpxor	xmm0,xmm2,xmm0 | 
 |  | 
 | 	vpxor	xmm0,xmm0,xmm5 | 
 |  | 
 | $L$128_dec_loop2: | 
 |  | 
 |  | 
 |  | 
 | 	cmp	r9,16 | 
 | 	jb	NEAR $L$128_dec_out | 
 | 	sub	r9,16 | 
 |  | 
 | 	vmovdqa	xmm2,xmm15 | 
 | 	vpaddd	xmm15,xmm15,XMMWORD[one] | 
 |  | 
 | 	vpxor	xmm2,xmm2,XMMWORD[r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[16+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[32+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[48+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[64+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[80+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[96+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[112+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[128+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[144+r8] | 
 | 	vaesenclast	xmm2,xmm2,XMMWORD[160+r8] | 
 | 	vpxor	xmm2,xmm2,XMMWORD[rdi] | 
 | 	vmovdqu	XMMWORD[rsi],xmm2 | 
 | 	add	rdi,16 | 
 | 	add	rsi,16 | 
 |  | 
 | 	vpxor	xmm0,xmm0,xmm2 | 
 | 	vmovdqa	xmm1,XMMWORD[((-32))+rcx] | 
 | 	call	GFMUL | 
 |  | 
 | 	jmp	NEAR $L$128_dec_loop2 | 
 |  | 
 | $L$128_dec_out: | 
 | 	vmovdqu	XMMWORD[rdx],xmm0 | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aes128gcmsiv_dec: | 
 | global	aes128gcmsiv_ecb_enc_block | 
 |  | 
 | ALIGN	16 | 
 | aes128gcmsiv_ecb_enc_block: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aes128gcmsiv_ecb_enc_block: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	vmovdqa	xmm1,XMMWORD[rdi] | 
 |  | 
 | 	vpxor	xmm1,xmm1,XMMWORD[rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[16+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[32+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[48+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[64+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[80+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[96+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[112+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[128+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[144+rdx] | 
 | 	vaesenclast	xmm1,xmm1,XMMWORD[160+rdx] | 
 |  | 
 | 	vmovdqa	XMMWORD[rsi],xmm1 | 
 |  | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aes128gcmsiv_ecb_enc_block: | 
 | global	aes256gcmsiv_aes_ks_enc_x1 | 
 |  | 
 | ALIGN	16 | 
 | aes256gcmsiv_aes_ks_enc_x1: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aes256gcmsiv_aes_ks_enc_x1: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 | 	mov	rcx,r9 | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	vmovdqa	xmm0,XMMWORD[con1] | 
 | 	vmovdqa	xmm15,XMMWORD[mask] | 
 | 	vmovdqa	xmm8,XMMWORD[rdi] | 
 | 	vmovdqa	xmm1,XMMWORD[rcx] | 
 | 	vmovdqa	xmm3,XMMWORD[16+rcx] | 
 | 	vpxor	xmm8,xmm8,xmm1 | 
 | 	vaesenc	xmm8,xmm8,xmm3 | 
 | 	vmovdqu	XMMWORD[rdx],xmm1 | 
 | 	vmovdqu	XMMWORD[16+rdx],xmm3 | 
 | 	vpxor	xmm14,xmm14,xmm14 | 
 |  | 
 | 	vpshufb	xmm2,xmm3,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpslldq	xmm4,xmm1,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 | 	vaesenc	xmm8,xmm8,xmm1 | 
 | 	vmovdqu	XMMWORD[32+rdx],xmm1 | 
 |  | 
 | 	vpshufd	xmm2,xmm1,0xff | 
 | 	vaesenclast	xmm2,xmm2,xmm14 | 
 | 	vpslldq	xmm4,xmm3,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpxor	xmm3,xmm3,xmm2 | 
 | 	vaesenc	xmm8,xmm8,xmm3 | 
 | 	vmovdqu	XMMWORD[48+rdx],xmm3 | 
 |  | 
 | 	vpshufb	xmm2,xmm3,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpslldq	xmm4,xmm1,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 | 	vaesenc	xmm8,xmm8,xmm1 | 
 | 	vmovdqu	XMMWORD[64+rdx],xmm1 | 
 |  | 
 | 	vpshufd	xmm2,xmm1,0xff | 
 | 	vaesenclast	xmm2,xmm2,xmm14 | 
 | 	vpslldq	xmm4,xmm3,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpxor	xmm3,xmm3,xmm2 | 
 | 	vaesenc	xmm8,xmm8,xmm3 | 
 | 	vmovdqu	XMMWORD[80+rdx],xmm3 | 
 |  | 
 | 	vpshufb	xmm2,xmm3,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpslldq	xmm4,xmm1,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 | 	vaesenc	xmm8,xmm8,xmm1 | 
 | 	vmovdqu	XMMWORD[96+rdx],xmm1 | 
 |  | 
 | 	vpshufd	xmm2,xmm1,0xff | 
 | 	vaesenclast	xmm2,xmm2,xmm14 | 
 | 	vpslldq	xmm4,xmm3,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpxor	xmm3,xmm3,xmm2 | 
 | 	vaesenc	xmm8,xmm8,xmm3 | 
 | 	vmovdqu	XMMWORD[112+rdx],xmm3 | 
 |  | 
 | 	vpshufb	xmm2,xmm3,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpslldq	xmm4,xmm1,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 | 	vaesenc	xmm8,xmm8,xmm1 | 
 | 	vmovdqu	XMMWORD[128+rdx],xmm1 | 
 |  | 
 | 	vpshufd	xmm2,xmm1,0xff | 
 | 	vaesenclast	xmm2,xmm2,xmm14 | 
 | 	vpslldq	xmm4,xmm3,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpxor	xmm3,xmm3,xmm2 | 
 | 	vaesenc	xmm8,xmm8,xmm3 | 
 | 	vmovdqu	XMMWORD[144+rdx],xmm3 | 
 |  | 
 | 	vpshufb	xmm2,xmm3,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpslldq	xmm4,xmm1,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 | 	vaesenc	xmm8,xmm8,xmm1 | 
 | 	vmovdqu	XMMWORD[160+rdx],xmm1 | 
 |  | 
 | 	vpshufd	xmm2,xmm1,0xff | 
 | 	vaesenclast	xmm2,xmm2,xmm14 | 
 | 	vpslldq	xmm4,xmm3,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpxor	xmm3,xmm3,xmm2 | 
 | 	vaesenc	xmm8,xmm8,xmm3 | 
 | 	vmovdqu	XMMWORD[176+rdx],xmm3 | 
 |  | 
 | 	vpshufb	xmm2,xmm3,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslld	xmm0,xmm0,1 | 
 | 	vpslldq	xmm4,xmm1,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 | 	vaesenc	xmm8,xmm8,xmm1 | 
 | 	vmovdqu	XMMWORD[192+rdx],xmm1 | 
 |  | 
 | 	vpshufd	xmm2,xmm1,0xff | 
 | 	vaesenclast	xmm2,xmm2,xmm14 | 
 | 	vpslldq	xmm4,xmm3,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpxor	xmm3,xmm3,xmm2 | 
 | 	vaesenc	xmm8,xmm8,xmm3 | 
 | 	vmovdqu	XMMWORD[208+rdx],xmm3 | 
 |  | 
 | 	vpshufb	xmm2,xmm3,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm0 | 
 | 	vpslldq	xmm4,xmm1,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpslldq	xmm4,xmm4,4 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpxor	xmm1,xmm1,xmm2 | 
 | 	vaesenclast	xmm8,xmm8,xmm1 | 
 | 	vmovdqu	XMMWORD[224+rdx],xmm1 | 
 |  | 
 | 	vmovdqa	XMMWORD[rsi],xmm8 | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aes256gcmsiv_aes_ks_enc_x1: | 
 | global	aes256gcmsiv_ecb_enc_block | 
 |  | 
 | ALIGN	16 | 
 | aes256gcmsiv_ecb_enc_block: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aes256gcmsiv_ecb_enc_block: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	vmovdqa	xmm1,XMMWORD[rdi] | 
 | 	vpxor	xmm1,xmm1,XMMWORD[rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[16+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[32+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[48+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[64+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[80+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[96+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[112+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[128+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[144+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[160+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[176+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[192+rdx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[208+rdx] | 
 | 	vaesenclast	xmm1,xmm1,XMMWORD[224+rdx] | 
 | 	vmovdqa	XMMWORD[rsi],xmm1 | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aes256gcmsiv_ecb_enc_block: | 
 | global	aes256gcmsiv_enc_msg_x4 | 
 |  | 
 | ALIGN	16 | 
 | aes256gcmsiv_enc_msg_x4: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aes256gcmsiv_enc_msg_x4: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 | 	mov	rcx,r9 | 
 | 	mov	r8,QWORD[40+rsp] | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	test	r8,r8 | 
 | 	jnz	NEAR $L$256_enc_msg_x4_start | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$256_enc_msg_x4_start: | 
 | 	mov	r10,r8 | 
 | 	shr	r8,4 | 
 | 	shl	r10,60 | 
 | 	jz	NEAR $L$256_enc_msg_x4_start2 | 
 | 	add	r8,1 | 
 |  | 
 | $L$256_enc_msg_x4_start2: | 
 | 	mov	r10,r8 | 
 | 	shl	r10,62 | 
 | 	shr	r10,62 | 
 |  | 
 |  | 
 | 	vmovdqa	xmm15,XMMWORD[rdx] | 
 | 	vpor	xmm15,xmm15,XMMWORD[OR_MASK] | 
 |  | 
 | 	vmovdqa	xmm4,XMMWORD[four] | 
 | 	vmovdqa	xmm0,xmm15 | 
 | 	vpaddd	xmm1,xmm15,XMMWORD[one] | 
 | 	vpaddd	xmm2,xmm15,XMMWORD[two] | 
 | 	vpaddd	xmm3,xmm15,XMMWORD[three] | 
 |  | 
 | 	shr	r8,2 | 
 | 	je	NEAR $L$256_enc_msg_x4_check_remainder | 
 |  | 
 | 	sub	rsi,64 | 
 | 	sub	rdi,64 | 
 |  | 
 | $L$256_enc_msg_x4_loop1: | 
 | 	add	rsi,64 | 
 | 	add	rdi,64 | 
 |  | 
 | 	vmovdqa	xmm5,xmm0 | 
 | 	vmovdqa	xmm6,xmm1 | 
 | 	vmovdqa	xmm7,xmm2 | 
 | 	vmovdqa	xmm8,xmm3 | 
 |  | 
 | 	vpxor	xmm5,xmm5,XMMWORD[rcx] | 
 | 	vpxor	xmm6,xmm6,XMMWORD[rcx] | 
 | 	vpxor	xmm7,xmm7,XMMWORD[rcx] | 
 | 	vpxor	xmm8,xmm8,XMMWORD[rcx] | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[16+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vpaddd	xmm0,xmm0,xmm4 | 
 | 	vmovdqu	xmm12,XMMWORD[32+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vpaddd	xmm1,xmm1,xmm4 | 
 | 	vmovdqu	xmm12,XMMWORD[48+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vpaddd	xmm2,xmm2,xmm4 | 
 | 	vmovdqu	xmm12,XMMWORD[64+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vpaddd	xmm3,xmm3,xmm4 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[80+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[96+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[112+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[128+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[144+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[160+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[176+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[192+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[208+rcx] | 
 | 	vaesenc	xmm5,xmm5,xmm12 | 
 | 	vaesenc	xmm6,xmm6,xmm12 | 
 | 	vaesenc	xmm7,xmm7,xmm12 | 
 | 	vaesenc	xmm8,xmm8,xmm12 | 
 |  | 
 | 	vmovdqu	xmm12,XMMWORD[224+rcx] | 
 | 	vaesenclast	xmm5,xmm5,xmm12 | 
 | 	vaesenclast	xmm6,xmm6,xmm12 | 
 | 	vaesenclast	xmm7,xmm7,xmm12 | 
 | 	vaesenclast	xmm8,xmm8,xmm12 | 
 |  | 
 |  | 
 |  | 
 | 	vpxor	xmm5,xmm5,XMMWORD[rdi] | 
 | 	vpxor	xmm6,xmm6,XMMWORD[16+rdi] | 
 | 	vpxor	xmm7,xmm7,XMMWORD[32+rdi] | 
 | 	vpxor	xmm8,xmm8,XMMWORD[48+rdi] | 
 |  | 
 | 	sub	r8,1 | 
 |  | 
 | 	vmovdqu	XMMWORD[rsi],xmm5 | 
 | 	vmovdqu	XMMWORD[16+rsi],xmm6 | 
 | 	vmovdqu	XMMWORD[32+rsi],xmm7 | 
 | 	vmovdqu	XMMWORD[48+rsi],xmm8 | 
 |  | 
 | 	jne	NEAR $L$256_enc_msg_x4_loop1 | 
 |  | 
 | 	add	rsi,64 | 
 | 	add	rdi,64 | 
 |  | 
 | $L$256_enc_msg_x4_check_remainder: | 
 | 	cmp	r10,0 | 
 | 	je	NEAR $L$256_enc_msg_x4_out | 
 |  | 
 | $L$256_enc_msg_x4_loop2: | 
 |  | 
 |  | 
 |  | 
 | 	vmovdqa	xmm5,xmm0 | 
 | 	vpaddd	xmm0,xmm0,XMMWORD[one] | 
 | 	vpxor	xmm5,xmm5,XMMWORD[rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[16+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[32+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[48+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[64+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[80+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[96+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[112+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[128+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[144+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[160+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[176+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[192+rcx] | 
 | 	vaesenc	xmm5,xmm5,XMMWORD[208+rcx] | 
 | 	vaesenclast	xmm5,xmm5,XMMWORD[224+rcx] | 
 |  | 
 |  | 
 | 	vpxor	xmm5,xmm5,XMMWORD[rdi] | 
 |  | 
 | 	vmovdqu	XMMWORD[rsi],xmm5 | 
 |  | 
 | 	add	rdi,16 | 
 | 	add	rsi,16 | 
 |  | 
 | 	sub	r10,1 | 
 | 	jne	NEAR $L$256_enc_msg_x4_loop2 | 
 |  | 
 | $L$256_enc_msg_x4_out: | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aes256gcmsiv_enc_msg_x4: | 
 | global	aes256gcmsiv_enc_msg_x8 | 
 |  | 
 | ALIGN	16 | 
 | aes256gcmsiv_enc_msg_x8: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aes256gcmsiv_enc_msg_x8: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 | 	mov	rcx,r9 | 
 | 	mov	r8,QWORD[40+rsp] | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	test	r8,r8 | 
 | 	jnz	NEAR $L$256_enc_msg_x8_start | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$256_enc_msg_x8_start: | 
 |  | 
 | 	mov	r11,rsp | 
 | 	sub	r11,16 | 
 | 	and	r11,-64 | 
 |  | 
 | 	mov	r10,r8 | 
 | 	shr	r8,4 | 
 | 	shl	r10,60 | 
 | 	jz	NEAR $L$256_enc_msg_x8_start2 | 
 | 	add	r8,1 | 
 |  | 
 | $L$256_enc_msg_x8_start2: | 
 | 	mov	r10,r8 | 
 | 	shl	r10,61 | 
 | 	shr	r10,61 | 
 |  | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[rdx] | 
 | 	vpor	xmm1,xmm1,XMMWORD[OR_MASK] | 
 |  | 
 |  | 
 | 	vpaddd	xmm0,xmm1,XMMWORD[seven] | 
 | 	vmovdqa	XMMWORD[r11],xmm0 | 
 | 	vpaddd	xmm9,xmm1,XMMWORD[one] | 
 | 	vpaddd	xmm10,xmm1,XMMWORD[two] | 
 | 	vpaddd	xmm11,xmm1,XMMWORD[three] | 
 | 	vpaddd	xmm12,xmm1,XMMWORD[four] | 
 | 	vpaddd	xmm13,xmm1,XMMWORD[five] | 
 | 	vpaddd	xmm14,xmm1,XMMWORD[six] | 
 | 	vmovdqa	xmm0,xmm1 | 
 |  | 
 | 	shr	r8,3 | 
 | 	jz	NEAR $L$256_enc_msg_x8_check_remainder | 
 |  | 
 | 	sub	rsi,128 | 
 | 	sub	rdi,128 | 
 |  | 
 | $L$256_enc_msg_x8_loop1: | 
 | 	add	rsi,128 | 
 | 	add	rdi,128 | 
 |  | 
 | 	vmovdqa	xmm1,xmm0 | 
 | 	vmovdqa	xmm2,xmm9 | 
 | 	vmovdqa	xmm3,xmm10 | 
 | 	vmovdqa	xmm4,xmm11 | 
 | 	vmovdqa	xmm5,xmm12 | 
 | 	vmovdqa	xmm6,xmm13 | 
 | 	vmovdqa	xmm7,xmm14 | 
 |  | 
 | 	vmovdqa	xmm8,XMMWORD[r11] | 
 |  | 
 | 	vpxor	xmm1,xmm1,XMMWORD[rcx] | 
 | 	vpxor	xmm2,xmm2,XMMWORD[rcx] | 
 | 	vpxor	xmm3,xmm3,XMMWORD[rcx] | 
 | 	vpxor	xmm4,xmm4,XMMWORD[rcx] | 
 | 	vpxor	xmm5,xmm5,XMMWORD[rcx] | 
 | 	vpxor	xmm6,xmm6,XMMWORD[rcx] | 
 | 	vpxor	xmm7,xmm7,XMMWORD[rcx] | 
 | 	vpxor	xmm8,xmm8,XMMWORD[rcx] | 
 |  | 
 | 	vmovdqu	xmm15,XMMWORD[16+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vmovdqa	xmm14,XMMWORD[r11] | 
 | 	vpaddd	xmm14,xmm14,XMMWORD[eight] | 
 | 	vmovdqa	XMMWORD[r11],xmm14 | 
 | 	vmovdqu	xmm15,XMMWORD[32+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vpsubd	xmm14,xmm14,XMMWORD[one] | 
 | 	vmovdqu	xmm15,XMMWORD[48+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vpaddd	xmm0,xmm0,XMMWORD[eight] | 
 | 	vmovdqu	xmm15,XMMWORD[64+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vpaddd	xmm9,xmm9,XMMWORD[eight] | 
 | 	vmovdqu	xmm15,XMMWORD[80+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vpaddd	xmm10,xmm10,XMMWORD[eight] | 
 | 	vmovdqu	xmm15,XMMWORD[96+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vpaddd	xmm11,xmm11,XMMWORD[eight] | 
 | 	vmovdqu	xmm15,XMMWORD[112+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vpaddd	xmm12,xmm12,XMMWORD[eight] | 
 | 	vmovdqu	xmm15,XMMWORD[128+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vpaddd	xmm13,xmm13,XMMWORD[eight] | 
 | 	vmovdqu	xmm15,XMMWORD[144+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vmovdqu	xmm15,XMMWORD[160+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vmovdqu	xmm15,XMMWORD[176+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vmovdqu	xmm15,XMMWORD[192+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vmovdqu	xmm15,XMMWORD[208+rcx] | 
 | 	vaesenc	xmm1,xmm1,xmm15 | 
 | 	vaesenc	xmm2,xmm2,xmm15 | 
 | 	vaesenc	xmm3,xmm3,xmm15 | 
 | 	vaesenc	xmm4,xmm4,xmm15 | 
 | 	vaesenc	xmm5,xmm5,xmm15 | 
 | 	vaesenc	xmm6,xmm6,xmm15 | 
 | 	vaesenc	xmm7,xmm7,xmm15 | 
 | 	vaesenc	xmm8,xmm8,xmm15 | 
 |  | 
 | 	vmovdqu	xmm15,XMMWORD[224+rcx] | 
 | 	vaesenclast	xmm1,xmm1,xmm15 | 
 | 	vaesenclast	xmm2,xmm2,xmm15 | 
 | 	vaesenclast	xmm3,xmm3,xmm15 | 
 | 	vaesenclast	xmm4,xmm4,xmm15 | 
 | 	vaesenclast	xmm5,xmm5,xmm15 | 
 | 	vaesenclast	xmm6,xmm6,xmm15 | 
 | 	vaesenclast	xmm7,xmm7,xmm15 | 
 | 	vaesenclast	xmm8,xmm8,xmm15 | 
 |  | 
 |  | 
 |  | 
 | 	vpxor	xmm1,xmm1,XMMWORD[rdi] | 
 | 	vpxor	xmm2,xmm2,XMMWORD[16+rdi] | 
 | 	vpxor	xmm3,xmm3,XMMWORD[32+rdi] | 
 | 	vpxor	xmm4,xmm4,XMMWORD[48+rdi] | 
 | 	vpxor	xmm5,xmm5,XMMWORD[64+rdi] | 
 | 	vpxor	xmm6,xmm6,XMMWORD[80+rdi] | 
 | 	vpxor	xmm7,xmm7,XMMWORD[96+rdi] | 
 | 	vpxor	xmm8,xmm8,XMMWORD[112+rdi] | 
 |  | 
 | 	sub	r8,1 | 
 |  | 
 | 	vmovdqu	XMMWORD[rsi],xmm1 | 
 | 	vmovdqu	XMMWORD[16+rsi],xmm2 | 
 | 	vmovdqu	XMMWORD[32+rsi],xmm3 | 
 | 	vmovdqu	XMMWORD[48+rsi],xmm4 | 
 | 	vmovdqu	XMMWORD[64+rsi],xmm5 | 
 | 	vmovdqu	XMMWORD[80+rsi],xmm6 | 
 | 	vmovdqu	XMMWORD[96+rsi],xmm7 | 
 | 	vmovdqu	XMMWORD[112+rsi],xmm8 | 
 |  | 
 | 	jne	NEAR $L$256_enc_msg_x8_loop1 | 
 |  | 
 | 	add	rsi,128 | 
 | 	add	rdi,128 | 
 |  | 
 | $L$256_enc_msg_x8_check_remainder: | 
 | 	cmp	r10,0 | 
 | 	je	NEAR $L$256_enc_msg_x8_out | 
 |  | 
 | $L$256_enc_msg_x8_loop2: | 
 |  | 
 |  | 
 | 	vmovdqa	xmm1,xmm0 | 
 | 	vpaddd	xmm0,xmm0,XMMWORD[one] | 
 |  | 
 | 	vpxor	xmm1,xmm1,XMMWORD[rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[16+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[32+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[48+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[64+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[80+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[96+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[112+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[128+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[144+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[160+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[176+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[192+rcx] | 
 | 	vaesenc	xmm1,xmm1,XMMWORD[208+rcx] | 
 | 	vaesenclast	xmm1,xmm1,XMMWORD[224+rcx] | 
 |  | 
 |  | 
 | 	vpxor	xmm1,xmm1,XMMWORD[rdi] | 
 |  | 
 | 	vmovdqu	XMMWORD[rsi],xmm1 | 
 |  | 
 | 	add	rdi,16 | 
 | 	add	rsi,16 | 
 | 	sub	r10,1 | 
 | 	jnz	NEAR $L$256_enc_msg_x8_loop2 | 
 |  | 
 | $L$256_enc_msg_x8_out: | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 |  | 
 | $L$SEH_end_aes256gcmsiv_enc_msg_x8: | 
 | global	aes256gcmsiv_dec | 
 |  | 
 | ALIGN	16 | 
 | aes256gcmsiv_dec: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aes256gcmsiv_dec: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 | 	mov	rcx,r9 | 
 | 	mov	r8,QWORD[40+rsp] | 
 | 	mov	r9,QWORD[48+rsp] | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 | 	test	r9,~15 | 
 | 	jnz	NEAR $L$256_dec_start | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$256_dec_start: | 
 | 	vzeroupper | 
 | 	vmovdqa	xmm0,XMMWORD[rdx] | 
 |  | 
 |  | 
 | 	vmovdqu	xmm15,XMMWORD[16+rdx] | 
 | 	vpor	xmm15,xmm15,XMMWORD[OR_MASK] | 
 | 	mov	rax,rdx | 
 |  | 
 | 	lea	rax,[32+rax] | 
 | 	lea	rcx,[32+rcx] | 
 |  | 
 | 	and	r9,~15 | 
 |  | 
 |  | 
 | 	cmp	r9,96 | 
 | 	jb	NEAR $L$256_dec_loop2 | 
 |  | 
 |  | 
 | 	sub	r9,96 | 
 | 	vmovdqa	xmm7,xmm15 | 
 | 	vpaddd	xmm8,xmm7,XMMWORD[one] | 
 | 	vpaddd	xmm9,xmm7,XMMWORD[two] | 
 | 	vpaddd	xmm10,xmm9,XMMWORD[one] | 
 | 	vpaddd	xmm11,xmm9,XMMWORD[two] | 
 | 	vpaddd	xmm12,xmm11,XMMWORD[one] | 
 | 	vpaddd	xmm15,xmm11,XMMWORD[two] | 
 |  | 
 | 	vpxor	xmm7,xmm7,XMMWORD[r8] | 
 | 	vpxor	xmm8,xmm8,XMMWORD[r8] | 
 | 	vpxor	xmm9,xmm9,XMMWORD[r8] | 
 | 	vpxor	xmm10,xmm10,XMMWORD[r8] | 
 | 	vpxor	xmm11,xmm11,XMMWORD[r8] | 
 | 	vpxor	xmm12,xmm12,XMMWORD[r8] | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[16+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[32+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[48+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[64+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[80+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[96+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[112+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[128+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[144+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[160+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[176+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[192+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[208+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[224+r8] | 
 | 	vaesenclast	xmm7,xmm7,xmm4 | 
 | 	vaesenclast	xmm8,xmm8,xmm4 | 
 | 	vaesenclast	xmm9,xmm9,xmm4 | 
 | 	vaesenclast	xmm10,xmm10,xmm4 | 
 | 	vaesenclast	xmm11,xmm11,xmm4 | 
 | 	vaesenclast	xmm12,xmm12,xmm4 | 
 |  | 
 |  | 
 | 	vpxor	xmm7,xmm7,XMMWORD[rdi] | 
 | 	vpxor	xmm8,xmm8,XMMWORD[16+rdi] | 
 | 	vpxor	xmm9,xmm9,XMMWORD[32+rdi] | 
 | 	vpxor	xmm10,xmm10,XMMWORD[48+rdi] | 
 | 	vpxor	xmm11,xmm11,XMMWORD[64+rdi] | 
 | 	vpxor	xmm12,xmm12,XMMWORD[80+rdi] | 
 |  | 
 | 	vmovdqu	XMMWORD[rsi],xmm7 | 
 | 	vmovdqu	XMMWORD[16+rsi],xmm8 | 
 | 	vmovdqu	XMMWORD[32+rsi],xmm9 | 
 | 	vmovdqu	XMMWORD[48+rsi],xmm10 | 
 | 	vmovdqu	XMMWORD[64+rsi],xmm11 | 
 | 	vmovdqu	XMMWORD[80+rsi],xmm12 | 
 |  | 
 | 	add	rdi,96 | 
 | 	add	rsi,96 | 
 | 	jmp	NEAR $L$256_dec_loop1 | 
 |  | 
 |  | 
 | ALIGN	64 | 
 | $L$256_dec_loop1: | 
 | 	cmp	r9,96 | 
 | 	jb	NEAR $L$256_dec_finish_96 | 
 | 	sub	r9,96 | 
 |  | 
 | 	vmovdqa	xmm6,xmm12 | 
 | 	vmovdqa	XMMWORD[(16-32)+rax],xmm11 | 
 | 	vmovdqa	XMMWORD[(32-32)+rax],xmm10 | 
 | 	vmovdqa	XMMWORD[(48-32)+rax],xmm9 | 
 | 	vmovdqa	XMMWORD[(64-32)+rax],xmm8 | 
 | 	vmovdqa	XMMWORD[(80-32)+rax],xmm7 | 
 |  | 
 | 	vmovdqa	xmm7,xmm15 | 
 | 	vpaddd	xmm8,xmm7,XMMWORD[one] | 
 | 	vpaddd	xmm9,xmm7,XMMWORD[two] | 
 | 	vpaddd	xmm10,xmm9,XMMWORD[one] | 
 | 	vpaddd	xmm11,xmm9,XMMWORD[two] | 
 | 	vpaddd	xmm12,xmm11,XMMWORD[one] | 
 | 	vpaddd	xmm15,xmm11,XMMWORD[two] | 
 |  | 
 | 	vmovdqa	xmm4,XMMWORD[r8] | 
 | 	vpxor	xmm7,xmm7,xmm4 | 
 | 	vpxor	xmm8,xmm8,xmm4 | 
 | 	vpxor	xmm9,xmm9,xmm4 | 
 | 	vpxor	xmm10,xmm10,xmm4 | 
 | 	vpxor	xmm11,xmm11,xmm4 | 
 | 	vpxor	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[((0-32))+rcx] | 
 | 	vpclmulqdq	xmm2,xmm6,xmm4,0x11 | 
 | 	vpclmulqdq	xmm3,xmm6,xmm4,0x00 | 
 | 	vpclmulqdq	xmm1,xmm6,xmm4,0x01 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm4,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[16+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[((-16))+rax] | 
 | 	vmovdqu	xmm13,XMMWORD[((-16))+rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[32+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[rax] | 
 | 	vmovdqu	xmm13,XMMWORD[rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[48+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[16+rax] | 
 | 	vmovdqu	xmm13,XMMWORD[16+rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[64+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[32+rax] | 
 | 	vmovdqu	xmm13,XMMWORD[32+rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[80+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[96+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[112+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 |  | 
 | 	vmovdqa	xmm6,XMMWORD[((80-32))+rax] | 
 | 	vpxor	xmm6,xmm6,xmm0 | 
 | 	vmovdqu	xmm5,XMMWORD[((80-32))+rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[128+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 |  | 
 | 	vpsrldq	xmm4,xmm1,8 | 
 | 	vpxor	xmm5,xmm2,xmm4 | 
 | 	vpslldq	xmm4,xmm1,8 | 
 | 	vpxor	xmm0,xmm3,xmm4 | 
 |  | 
 | 	vmovdqa	xmm3,XMMWORD[poly] | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[144+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[160+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[176+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[192+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[208+r8] | 
 | 	vaesenc	xmm7,xmm7,xmm4 | 
 | 	vaesenc	xmm8,xmm8,xmm4 | 
 | 	vaesenc	xmm9,xmm9,xmm4 | 
 | 	vaesenc	xmm10,xmm10,xmm4 | 
 | 	vaesenc	xmm11,xmm11,xmm4 | 
 | 	vaesenc	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[224+r8] | 
 | 	vpalignr	xmm2,xmm0,xmm0,8 | 
 | 	vpclmulqdq	xmm0,xmm0,xmm3,0x10 | 
 | 	vpxor	xmm0,xmm2,xmm0 | 
 |  | 
 | 	vpxor	xmm4,xmm6,XMMWORD[rdi] | 
 | 	vaesenclast	xmm7,xmm7,xmm4 | 
 | 	vpxor	xmm4,xmm6,XMMWORD[16+rdi] | 
 | 	vaesenclast	xmm8,xmm8,xmm4 | 
 | 	vpxor	xmm4,xmm6,XMMWORD[32+rdi] | 
 | 	vaesenclast	xmm9,xmm9,xmm4 | 
 | 	vpxor	xmm4,xmm6,XMMWORD[48+rdi] | 
 | 	vaesenclast	xmm10,xmm10,xmm4 | 
 | 	vpxor	xmm4,xmm6,XMMWORD[64+rdi] | 
 | 	vaesenclast	xmm11,xmm11,xmm4 | 
 | 	vpxor	xmm4,xmm6,XMMWORD[80+rdi] | 
 | 	vaesenclast	xmm12,xmm12,xmm4 | 
 |  | 
 | 	vpalignr	xmm2,xmm0,xmm0,8 | 
 | 	vpclmulqdq	xmm0,xmm0,xmm3,0x10 | 
 | 	vpxor	xmm0,xmm2,xmm0 | 
 |  | 
 | 	vmovdqu	XMMWORD[rsi],xmm7 | 
 | 	vmovdqu	XMMWORD[16+rsi],xmm8 | 
 | 	vmovdqu	XMMWORD[32+rsi],xmm9 | 
 | 	vmovdqu	XMMWORD[48+rsi],xmm10 | 
 | 	vmovdqu	XMMWORD[64+rsi],xmm11 | 
 | 	vmovdqu	XMMWORD[80+rsi],xmm12 | 
 |  | 
 | 	vpxor	xmm0,xmm0,xmm5 | 
 |  | 
 | 	lea	rdi,[96+rdi] | 
 | 	lea	rsi,[96+rsi] | 
 | 	jmp	NEAR $L$256_dec_loop1 | 
 |  | 
 | $L$256_dec_finish_96: | 
 | 	vmovdqa	xmm6,xmm12 | 
 | 	vmovdqa	XMMWORD[(16-32)+rax],xmm11 | 
 | 	vmovdqa	XMMWORD[(32-32)+rax],xmm10 | 
 | 	vmovdqa	XMMWORD[(48-32)+rax],xmm9 | 
 | 	vmovdqa	XMMWORD[(64-32)+rax],xmm8 | 
 | 	vmovdqa	XMMWORD[(80-32)+rax],xmm7 | 
 |  | 
 | 	vmovdqu	xmm4,XMMWORD[((0-32))+rcx] | 
 | 	vpclmulqdq	xmm1,xmm6,xmm4,0x10 | 
 | 	vpclmulqdq	xmm2,xmm6,xmm4,0x11 | 
 | 	vpclmulqdq	xmm3,xmm6,xmm4,0x00 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm4,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[((-16))+rax] | 
 | 	vmovdqu	xmm13,XMMWORD[((-16))+rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[rax] | 
 | 	vmovdqu	xmm13,XMMWORD[rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[16+rax] | 
 | 	vmovdqu	xmm13,XMMWORD[16+rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[32+rax] | 
 | 	vmovdqu	xmm13,XMMWORD[32+rcx] | 
 |  | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm13,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 |  | 
 | 	vmovdqu	xmm6,XMMWORD[((80-32))+rax] | 
 | 	vpxor	xmm6,xmm6,xmm0 | 
 | 	vmovdqu	xmm5,XMMWORD[((80-32))+rcx] | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x11 | 
 | 	vpxor	xmm2,xmm2,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x00 | 
 | 	vpxor	xmm3,xmm3,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x10 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 | 	vpclmulqdq	xmm4,xmm6,xmm5,0x01 | 
 | 	vpxor	xmm1,xmm1,xmm4 | 
 |  | 
 | 	vpsrldq	xmm4,xmm1,8 | 
 | 	vpxor	xmm5,xmm2,xmm4 | 
 | 	vpslldq	xmm4,xmm1,8 | 
 | 	vpxor	xmm0,xmm3,xmm4 | 
 |  | 
 | 	vmovdqa	xmm3,XMMWORD[poly] | 
 |  | 
 | 	vpalignr	xmm2,xmm0,xmm0,8 | 
 | 	vpclmulqdq	xmm0,xmm0,xmm3,0x10 | 
 | 	vpxor	xmm0,xmm2,xmm0 | 
 |  | 
 | 	vpalignr	xmm2,xmm0,xmm0,8 | 
 | 	vpclmulqdq	xmm0,xmm0,xmm3,0x10 | 
 | 	vpxor	xmm0,xmm2,xmm0 | 
 |  | 
 | 	vpxor	xmm0,xmm0,xmm5 | 
 |  | 
 | $L$256_dec_loop2: | 
 |  | 
 |  | 
 |  | 
 | 	cmp	r9,16 | 
 | 	jb	NEAR $L$256_dec_out | 
 | 	sub	r9,16 | 
 |  | 
 | 	vmovdqa	xmm2,xmm15 | 
 | 	vpaddd	xmm15,xmm15,XMMWORD[one] | 
 |  | 
 | 	vpxor	xmm2,xmm2,XMMWORD[r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[16+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[32+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[48+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[64+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[80+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[96+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[112+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[128+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[144+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[160+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[176+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[192+r8] | 
 | 	vaesenc	xmm2,xmm2,XMMWORD[208+r8] | 
 | 	vaesenclast	xmm2,xmm2,XMMWORD[224+r8] | 
 | 	vpxor	xmm2,xmm2,XMMWORD[rdi] | 
 | 	vmovdqu	XMMWORD[rsi],xmm2 | 
 | 	add	rdi,16 | 
 | 	add	rsi,16 | 
 |  | 
 | 	vpxor	xmm0,xmm0,xmm2 | 
 | 	vmovdqa	xmm1,XMMWORD[((-32))+rcx] | 
 | 	call	GFMUL | 
 |  | 
 | 	jmp	NEAR $L$256_dec_loop2 | 
 |  | 
 | $L$256_dec_out: | 
 | 	vmovdqu	XMMWORD[rdx],xmm0 | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aes256gcmsiv_dec: | 
 | global	aes256gcmsiv_kdf | 
 |  | 
 | ALIGN	16 | 
 | aes256gcmsiv_kdf: | 
 | 	mov	QWORD[8+rsp],rdi	;WIN64 prologue | 
 | 	mov	QWORD[16+rsp],rsi | 
 | 	mov	rax,rsp | 
 | $L$SEH_begin_aes256gcmsiv_kdf: | 
 | 	mov	rdi,rcx | 
 | 	mov	rsi,rdx | 
 | 	mov	rdx,r8 | 
 |  | 
 |  | 
 |  | 
 | _CET_ENDBR | 
 |  | 
 |  | 
 |  | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[rdx] | 
 | 	vmovdqa	xmm4,XMMWORD[rdi] | 
 | 	vmovdqa	xmm11,XMMWORD[and_mask] | 
 | 	vmovdqa	xmm8,XMMWORD[one] | 
 | 	vpshufd	xmm4,xmm4,0x90 | 
 | 	vpand	xmm4,xmm4,xmm11 | 
 | 	vpaddd	xmm6,xmm4,xmm8 | 
 | 	vpaddd	xmm7,xmm6,xmm8 | 
 | 	vpaddd	xmm11,xmm7,xmm8 | 
 | 	vpaddd	xmm12,xmm11,xmm8 | 
 | 	vpaddd	xmm13,xmm12,xmm8 | 
 |  | 
 | 	vpxor	xmm4,xmm4,xmm1 | 
 | 	vpxor	xmm6,xmm6,xmm1 | 
 | 	vpxor	xmm7,xmm7,xmm1 | 
 | 	vpxor	xmm11,xmm11,xmm1 | 
 | 	vpxor	xmm12,xmm12,xmm1 | 
 | 	vpxor	xmm13,xmm13,xmm1 | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[16+rdx] | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vaesenc	xmm6,xmm6,xmm1 | 
 | 	vaesenc	xmm7,xmm7,xmm1 | 
 | 	vaesenc	xmm11,xmm11,xmm1 | 
 | 	vaesenc	xmm12,xmm12,xmm1 | 
 | 	vaesenc	xmm13,xmm13,xmm1 | 
 |  | 
 | 	vmovdqa	xmm2,XMMWORD[32+rdx] | 
 | 	vaesenc	xmm4,xmm4,xmm2 | 
 | 	vaesenc	xmm6,xmm6,xmm2 | 
 | 	vaesenc	xmm7,xmm7,xmm2 | 
 | 	vaesenc	xmm11,xmm11,xmm2 | 
 | 	vaesenc	xmm12,xmm12,xmm2 | 
 | 	vaesenc	xmm13,xmm13,xmm2 | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[48+rdx] | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vaesenc	xmm6,xmm6,xmm1 | 
 | 	vaesenc	xmm7,xmm7,xmm1 | 
 | 	vaesenc	xmm11,xmm11,xmm1 | 
 | 	vaesenc	xmm12,xmm12,xmm1 | 
 | 	vaesenc	xmm13,xmm13,xmm1 | 
 |  | 
 | 	vmovdqa	xmm2,XMMWORD[64+rdx] | 
 | 	vaesenc	xmm4,xmm4,xmm2 | 
 | 	vaesenc	xmm6,xmm6,xmm2 | 
 | 	vaesenc	xmm7,xmm7,xmm2 | 
 | 	vaesenc	xmm11,xmm11,xmm2 | 
 | 	vaesenc	xmm12,xmm12,xmm2 | 
 | 	vaesenc	xmm13,xmm13,xmm2 | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[80+rdx] | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vaesenc	xmm6,xmm6,xmm1 | 
 | 	vaesenc	xmm7,xmm7,xmm1 | 
 | 	vaesenc	xmm11,xmm11,xmm1 | 
 | 	vaesenc	xmm12,xmm12,xmm1 | 
 | 	vaesenc	xmm13,xmm13,xmm1 | 
 |  | 
 | 	vmovdqa	xmm2,XMMWORD[96+rdx] | 
 | 	vaesenc	xmm4,xmm4,xmm2 | 
 | 	vaesenc	xmm6,xmm6,xmm2 | 
 | 	vaesenc	xmm7,xmm7,xmm2 | 
 | 	vaesenc	xmm11,xmm11,xmm2 | 
 | 	vaesenc	xmm12,xmm12,xmm2 | 
 | 	vaesenc	xmm13,xmm13,xmm2 | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[112+rdx] | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vaesenc	xmm6,xmm6,xmm1 | 
 | 	vaesenc	xmm7,xmm7,xmm1 | 
 | 	vaesenc	xmm11,xmm11,xmm1 | 
 | 	vaesenc	xmm12,xmm12,xmm1 | 
 | 	vaesenc	xmm13,xmm13,xmm1 | 
 |  | 
 | 	vmovdqa	xmm2,XMMWORD[128+rdx] | 
 | 	vaesenc	xmm4,xmm4,xmm2 | 
 | 	vaesenc	xmm6,xmm6,xmm2 | 
 | 	vaesenc	xmm7,xmm7,xmm2 | 
 | 	vaesenc	xmm11,xmm11,xmm2 | 
 | 	vaesenc	xmm12,xmm12,xmm2 | 
 | 	vaesenc	xmm13,xmm13,xmm2 | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[144+rdx] | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vaesenc	xmm6,xmm6,xmm1 | 
 | 	vaesenc	xmm7,xmm7,xmm1 | 
 | 	vaesenc	xmm11,xmm11,xmm1 | 
 | 	vaesenc	xmm12,xmm12,xmm1 | 
 | 	vaesenc	xmm13,xmm13,xmm1 | 
 |  | 
 | 	vmovdqa	xmm2,XMMWORD[160+rdx] | 
 | 	vaesenc	xmm4,xmm4,xmm2 | 
 | 	vaesenc	xmm6,xmm6,xmm2 | 
 | 	vaesenc	xmm7,xmm7,xmm2 | 
 | 	vaesenc	xmm11,xmm11,xmm2 | 
 | 	vaesenc	xmm12,xmm12,xmm2 | 
 | 	vaesenc	xmm13,xmm13,xmm2 | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[176+rdx] | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vaesenc	xmm6,xmm6,xmm1 | 
 | 	vaesenc	xmm7,xmm7,xmm1 | 
 | 	vaesenc	xmm11,xmm11,xmm1 | 
 | 	vaesenc	xmm12,xmm12,xmm1 | 
 | 	vaesenc	xmm13,xmm13,xmm1 | 
 |  | 
 | 	vmovdqa	xmm2,XMMWORD[192+rdx] | 
 | 	vaesenc	xmm4,xmm4,xmm2 | 
 | 	vaesenc	xmm6,xmm6,xmm2 | 
 | 	vaesenc	xmm7,xmm7,xmm2 | 
 | 	vaesenc	xmm11,xmm11,xmm2 | 
 | 	vaesenc	xmm12,xmm12,xmm2 | 
 | 	vaesenc	xmm13,xmm13,xmm2 | 
 |  | 
 | 	vmovdqa	xmm1,XMMWORD[208+rdx] | 
 | 	vaesenc	xmm4,xmm4,xmm1 | 
 | 	vaesenc	xmm6,xmm6,xmm1 | 
 | 	vaesenc	xmm7,xmm7,xmm1 | 
 | 	vaesenc	xmm11,xmm11,xmm1 | 
 | 	vaesenc	xmm12,xmm12,xmm1 | 
 | 	vaesenc	xmm13,xmm13,xmm1 | 
 |  | 
 | 	vmovdqa	xmm2,XMMWORD[224+rdx] | 
 | 	vaesenclast	xmm4,xmm4,xmm2 | 
 | 	vaesenclast	xmm6,xmm6,xmm2 | 
 | 	vaesenclast	xmm7,xmm7,xmm2 | 
 | 	vaesenclast	xmm11,xmm11,xmm2 | 
 | 	vaesenclast	xmm12,xmm12,xmm2 | 
 | 	vaesenclast	xmm13,xmm13,xmm2 | 
 |  | 
 |  | 
 | 	vmovdqa	XMMWORD[rsi],xmm4 | 
 | 	vmovdqa	XMMWORD[16+rsi],xmm6 | 
 | 	vmovdqa	XMMWORD[32+rsi],xmm7 | 
 | 	vmovdqa	XMMWORD[48+rsi],xmm11 | 
 | 	vmovdqa	XMMWORD[64+rsi],xmm12 | 
 | 	vmovdqa	XMMWORD[80+rsi],xmm13 | 
 | 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue | 
 | 	mov	rsi,QWORD[16+rsp] | 
 | 	ret | 
 |  | 
 | $L$SEH_end_aes256gcmsiv_kdf: | 
 | %else | 
 | ; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 | 
 | ret | 
 | %endif |