| ; This file is generated from a similarly-named Perl script in the BoringSSL |
| ; source tree. Do not edit by hand. |
| |
| %ifidn __OUTPUT_FORMAT__, win64 |
| default rel |
| %define XMMWORD |
| %define YMMWORD |
| %define ZMMWORD |
| %define _CET_ENDBR |
| |
| %ifdef BORINGSSL_PREFIX |
| %include "boringssl_prefix_symbols_nasm.inc" |
| %endif |
| section .rdata rdata align=8 |
| ALIGN 64 |
| chacha20_poly1305_constants: |
| $L$chacha20_consts: |
| DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' |
| DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' |
| $L$rol8: |
| DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 |
| DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 |
| $L$rol16: |
| DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 |
| DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 |
| $L$avx2_init: |
| DD 0,0,0,0 |
| $L$sse_inc: |
| DD 1,0,0,0 |
| $L$avx2_inc: |
| DD 2,0,0,0,2,0,0,0 |
| $L$clamp: |
| DQ 0x0FFFFFFC0FFFFFFF,0x0FFFFFFC0FFFFFFC |
| DQ 0xFFFFFFFFFFFFFFFF,0xFFFFFFFFFFFFFFFF |
| ALIGN 16 |
| $L$and_masks: |
| DB 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff |
| section .text code align=64 |
| |
| |
| |
| ALIGN 64 |
| poly_hash_ad_internal: |
| |
| |
| xor r10,r10 |
| xor r11,r11 |
| xor r12,r12 |
| cmp r8,13 |
| jne NEAR $L$hash_ad_loop |
| $L$poly_fast_tls_ad: |
| |
| mov r10,QWORD[rcx] |
| mov r11,QWORD[5+rcx] |
| shr r11,24 |
| mov r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| ret |
| $L$hash_ad_loop: |
| |
| cmp r8,16 |
| jb NEAR $L$hash_ad_tail |
| add r10,QWORD[((0+0))+rcx] |
| adc r11,QWORD[((8+0))+rcx] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rcx,[16+rcx] |
| sub r8,16 |
| jmp NEAR $L$hash_ad_loop |
| $L$hash_ad_tail: |
| cmp r8,0 |
| je NEAR $L$hash_ad_done |
| |
| xor r13,r13 |
| xor r14,r14 |
| xor r15,r15 |
| add rcx,r8 |
| $L$hash_ad_tail_loop: |
| shld r14,r13,8 |
| shl r13,8 |
| movzx r15,BYTE[((-1))+rcx] |
| xor r13,r15 |
| dec rcx |
| dec r8 |
| jne NEAR $L$hash_ad_tail_loop |
| |
| add r10,r13 |
| adc r11,r14 |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| |
| $L$hash_ad_done: |
| ret |
| |
| |
| |
| global chacha20_poly1305_open_nohw |
| |
| ALIGN 64 |
| chacha20_poly1305_open_nohw: |
| mov QWORD[8+rsp],rdi ;WIN64 prologue |
| mov QWORD[16+rsp],rsi |
| mov rax,rsp |
| $L$SEH_begin_chacha20_poly1305_open_nohw: |
| mov rdi,rcx |
| mov rsi,rdx |
| mov rdx,r8 |
| mov rcx,r9 |
| mov r8,QWORD[40+rsp] |
| mov r9,QWORD[48+rsp] |
| |
| |
| |
| _CET_ENDBR |
| push rbp |
| |
| push rbx |
| |
| push r12 |
| |
| push r13 |
| |
| push r14 |
| |
| push r15 |
| |
| |
| |
| push r9 |
| |
| sub rsp,288 + 160 + 32 |
| |
| |
| lea rbp,[32+rsp] |
| and rbp,-32 |
| |
| movaps XMMWORD[(0+0)+rbp],xmm6 |
| movaps XMMWORD[(16+0)+rbp],xmm7 |
| movaps XMMWORD[(32+0)+rbp],xmm8 |
| movaps XMMWORD[(48+0)+rbp],xmm9 |
| movaps XMMWORD[(64+0)+rbp],xmm10 |
| movaps XMMWORD[(80+0)+rbp],xmm11 |
| movaps XMMWORD[(96+0)+rbp],xmm12 |
| movaps XMMWORD[(112+0)+rbp],xmm13 |
| movaps XMMWORD[(128+0)+rbp],xmm14 |
| movaps XMMWORD[(144+0)+rbp],xmm15 |
| |
| mov rbx,rdx |
| mov QWORD[((0+160+32))+rbp],r8 |
| mov QWORD[((8+160+32))+rbp],rbx |
| |
| cmp rbx,128 |
| jbe NEAR $L$open_sse_128 |
| |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqu xmm4,XMMWORD[r9] |
| movdqu xmm8,XMMWORD[16+r9] |
| movdqu xmm12,XMMWORD[32+r9] |
| |
| movdqa xmm7,xmm12 |
| |
| movdqa XMMWORD[(160+48)+rbp],xmm4 |
| movdqa XMMWORD[(160+64)+rbp],xmm8 |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| mov r10,10 |
| $L$open_sse_init_rounds: |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| |
| dec r10 |
| jne NEAR $L$open_sse_init_rounds |
| |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| |
| pand xmm0,XMMWORD[$L$clamp] |
| movdqa XMMWORD[(160+0)+rbp],xmm0 |
| movdqa XMMWORD[(160+16)+rbp],xmm4 |
| |
| mov r8,r8 |
| call poly_hash_ad_internal |
| $L$open_sse_main_loop: |
| cmp rbx,16*16 |
| jb NEAR $L$open_sse_tail |
| |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm1,xmm0 |
| movdqa xmm5,xmm4 |
| movdqa xmm9,xmm8 |
| movdqa xmm2,xmm0 |
| movdqa xmm6,xmm4 |
| movdqa xmm10,xmm8 |
| movdqa xmm3,xmm0 |
| movdqa xmm7,xmm4 |
| movdqa xmm11,xmm8 |
| movdqa xmm15,XMMWORD[((160+96))+rbp] |
| paddd xmm15,XMMWORD[$L$sse_inc] |
| movdqa xmm14,xmm15 |
| paddd xmm14,XMMWORD[$L$sse_inc] |
| movdqa xmm13,xmm14 |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm12,xmm13 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| movdqa XMMWORD[(160+112)+rbp],xmm13 |
| movdqa XMMWORD[(160+128)+rbp],xmm14 |
| movdqa XMMWORD[(160+144)+rbp],xmm15 |
| |
| |
| |
| mov rcx,4 |
| mov r8,rsi |
| $L$open_sse_main_loop_rounds: |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,XMMWORD[$L$rol16] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| add r10,QWORD[((0+0))+r8] |
| adc r11,QWORD[((8+0))+r8] |
| adc r12,1 |
| |
| lea r8,[16+r8] |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,20 |
| pslld xmm7,32-20 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,20 |
| pslld xmm6,32-20 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,20 |
| pslld xmm5,32-20 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,20 |
| pslld xmm4,32-20 |
| pxor xmm4,xmm8 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| movdqa xmm8,XMMWORD[$L$rol8] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,25 |
| pslld xmm7,32-25 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,25 |
| pslld xmm6,32-25 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,25 |
| pslld xmm5,32-25 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,25 |
| pslld xmm4,32-25 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| DB 102,15,58,15,255,4 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,12 |
| DB 102,15,58,15,246,4 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,12 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,XMMWORD[$L$rol16] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,20 |
| pslld xmm7,32-20 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,20 |
| pslld xmm6,32-20 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,20 |
| pslld xmm5,32-20 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,20 |
| pslld xmm4,32-20 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[$L$rol8] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,25 |
| pslld xmm7,32-25 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,25 |
| pslld xmm6,32-25 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,25 |
| pslld xmm5,32-25 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,25 |
| pslld xmm4,32-25 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| DB 102,15,58,15,255,12 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,4 |
| DB 102,15,58,15,246,12 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,4 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| |
| dec rcx |
| jge NEAR $L$open_sse_main_loop_rounds |
| add r10,QWORD[((0+0))+r8] |
| adc r11,QWORD[((8+0))+r8] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea r8,[16+r8] |
| cmp rcx,-6 |
| jg NEAR $L$open_sse_main_loop_rounds |
| paddd xmm3,XMMWORD[$L$chacha20_consts] |
| paddd xmm7,XMMWORD[((160+48))+rbp] |
| paddd xmm11,XMMWORD[((160+64))+rbp] |
| paddd xmm15,XMMWORD[((160+144))+rbp] |
| paddd xmm2,XMMWORD[$L$chacha20_consts] |
| paddd xmm6,XMMWORD[((160+48))+rbp] |
| paddd xmm10,XMMWORD[((160+64))+rbp] |
| paddd xmm14,XMMWORD[((160+128))+rbp] |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm5,XMMWORD[((160+48))+rbp] |
| paddd xmm9,XMMWORD[((160+64))+rbp] |
| paddd xmm13,XMMWORD[((160+112))+rbp] |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| movdqa XMMWORD[(160+80)+rbp],xmm12 |
| movdqu xmm12,XMMWORD[((0 + 0))+rsi] |
| pxor xmm12,xmm3 |
| movdqu XMMWORD[(0 + 0)+rdi],xmm12 |
| movdqu xmm12,XMMWORD[((16 + 0))+rsi] |
| pxor xmm12,xmm7 |
| movdqu XMMWORD[(16 + 0)+rdi],xmm12 |
| movdqu xmm12,XMMWORD[((32 + 0))+rsi] |
| pxor xmm12,xmm11 |
| movdqu XMMWORD[(32 + 0)+rdi],xmm12 |
| movdqu xmm12,XMMWORD[((48 + 0))+rsi] |
| pxor xmm12,xmm15 |
| movdqu XMMWORD[(48 + 0)+rdi],xmm12 |
| movdqu xmm3,XMMWORD[((0 + 64))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 64))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 64))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 64))+rsi] |
| pxor xmm2,xmm3 |
| pxor xmm6,xmm7 |
| pxor xmm10,xmm11 |
| pxor xmm15,xmm14 |
| movdqu XMMWORD[(0 + 64)+rdi],xmm2 |
| movdqu XMMWORD[(16 + 64)+rdi],xmm6 |
| movdqu XMMWORD[(32 + 64)+rdi],xmm10 |
| movdqu XMMWORD[(48 + 64)+rdi],xmm15 |
| movdqu xmm3,XMMWORD[((0 + 128))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 128))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 128))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 128))+rsi] |
| pxor xmm1,xmm3 |
| pxor xmm5,xmm7 |
| pxor xmm9,xmm11 |
| pxor xmm15,xmm13 |
| movdqu XMMWORD[(0 + 128)+rdi],xmm1 |
| movdqu XMMWORD[(16 + 128)+rdi],xmm5 |
| movdqu XMMWORD[(32 + 128)+rdi],xmm9 |
| movdqu XMMWORD[(48 + 128)+rdi],xmm15 |
| movdqu xmm3,XMMWORD[((0 + 192))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 192))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 192))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 192))+rsi] |
| pxor xmm0,xmm3 |
| pxor xmm4,xmm7 |
| pxor xmm8,xmm11 |
| pxor xmm15,XMMWORD[((160+80))+rbp] |
| movdqu XMMWORD[(0 + 192)+rdi],xmm0 |
| movdqu XMMWORD[(16 + 192)+rdi],xmm4 |
| movdqu XMMWORD[(32 + 192)+rdi],xmm8 |
| movdqu XMMWORD[(48 + 192)+rdi],xmm15 |
| |
| lea rsi,[256+rsi] |
| lea rdi,[256+rdi] |
| sub rbx,16*16 |
| jmp NEAR $L$open_sse_main_loop |
| $L$open_sse_tail: |
| |
| test rbx,rbx |
| jz NEAR $L$open_sse_finalize |
| cmp rbx,12*16 |
| ja NEAR $L$open_sse_tail_256 |
| cmp rbx,8*16 |
| ja NEAR $L$open_sse_tail_192 |
| cmp rbx,4*16 |
| ja NEAR $L$open_sse_tail_128 |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm12,XMMWORD[((160+96))+rbp] |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| |
| xor r8,r8 |
| mov rcx,rbx |
| cmp rcx,16 |
| jb NEAR $L$open_sse_tail_64_rounds |
| $L$open_sse_tail_64_rounds_and_x1hash: |
| add r10,QWORD[((0+0))+r8*1+rsi] |
| adc r11,QWORD[((8+0))+r8*1+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| sub rcx,16 |
| $L$open_sse_tail_64_rounds: |
| add r8,16 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| |
| cmp rcx,16 |
| jae NEAR $L$open_sse_tail_64_rounds_and_x1hash |
| cmp r8,10*16 |
| jne NEAR $L$open_sse_tail_64_rounds |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| |
| jmp NEAR $L$open_sse_tail_64_dec_loop |
| |
| $L$open_sse_tail_128: |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm1,xmm0 |
| movdqa xmm5,xmm4 |
| movdqa xmm9,xmm8 |
| movdqa xmm13,XMMWORD[((160+96))+rbp] |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm12,xmm13 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| movdqa XMMWORD[(160+112)+rbp],xmm13 |
| |
| mov rcx,rbx |
| and rcx,-16 |
| xor r8,r8 |
| $L$open_sse_tail_128_rounds_and_x1hash: |
| add r10,QWORD[((0+0))+r8*1+rsi] |
| adc r11,QWORD[((8+0))+r8*1+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| $L$open_sse_tail_128_rounds: |
| add r8,16 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| |
| cmp r8,rcx |
| jb NEAR $L$open_sse_tail_128_rounds_and_x1hash |
| cmp r8,10*16 |
| jne NEAR $L$open_sse_tail_128_rounds |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm5,XMMWORD[((160+48))+rbp] |
| paddd xmm9,XMMWORD[((160+64))+rbp] |
| paddd xmm13,XMMWORD[((160+112))+rbp] |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| movdqu xmm3,XMMWORD[((0 + 0))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 0))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 0))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 0))+rsi] |
| pxor xmm1,xmm3 |
| pxor xmm5,xmm7 |
| pxor xmm9,xmm11 |
| pxor xmm15,xmm13 |
| movdqu XMMWORD[(0 + 0)+rdi],xmm1 |
| movdqu XMMWORD[(16 + 0)+rdi],xmm5 |
| movdqu XMMWORD[(32 + 0)+rdi],xmm9 |
| movdqu XMMWORD[(48 + 0)+rdi],xmm15 |
| |
| sub rbx,4*16 |
| lea rsi,[64+rsi] |
| lea rdi,[64+rdi] |
| jmp NEAR $L$open_sse_tail_64_dec_loop |
| |
| $L$open_sse_tail_192: |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm1,xmm0 |
| movdqa xmm5,xmm4 |
| movdqa xmm9,xmm8 |
| movdqa xmm2,xmm0 |
| movdqa xmm6,xmm4 |
| movdqa xmm10,xmm8 |
| movdqa xmm14,XMMWORD[((160+96))+rbp] |
| paddd xmm14,XMMWORD[$L$sse_inc] |
| movdqa xmm13,xmm14 |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm12,xmm13 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| movdqa XMMWORD[(160+112)+rbp],xmm13 |
| movdqa XMMWORD[(160+128)+rbp],xmm14 |
| |
| mov rcx,rbx |
| mov r8,10*16 |
| cmp rcx,10*16 |
| cmovg rcx,r8 |
| and rcx,-16 |
| xor r8,r8 |
| $L$open_sse_tail_192_rounds_and_x1hash: |
| add r10,QWORD[((0+0))+r8*1+rsi] |
| adc r11,QWORD[((8+0))+r8*1+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| $L$open_sse_tail_192_rounds: |
| add r8,16 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm3 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm3 |
| DB 102,15,58,15,246,4 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,12 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm3 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm3 |
| DB 102,15,58,15,246,12 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,4 |
| |
| cmp r8,rcx |
| jb NEAR $L$open_sse_tail_192_rounds_and_x1hash |
| cmp r8,10*16 |
| jne NEAR $L$open_sse_tail_192_rounds |
| cmp rbx,11*16 |
| jb NEAR $L$open_sse_tail_192_finish |
| add r10,QWORD[((0+160))+rsi] |
| adc r11,QWORD[((8+160))+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| cmp rbx,12*16 |
| jb NEAR $L$open_sse_tail_192_finish |
| add r10,QWORD[((0+176))+rsi] |
| adc r11,QWORD[((8+176))+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| $L$open_sse_tail_192_finish: |
| paddd xmm2,XMMWORD[$L$chacha20_consts] |
| paddd xmm6,XMMWORD[((160+48))+rbp] |
| paddd xmm10,XMMWORD[((160+64))+rbp] |
| paddd xmm14,XMMWORD[((160+128))+rbp] |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm5,XMMWORD[((160+48))+rbp] |
| paddd xmm9,XMMWORD[((160+64))+rbp] |
| paddd xmm13,XMMWORD[((160+112))+rbp] |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| movdqu xmm3,XMMWORD[((0 + 0))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 0))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 0))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 0))+rsi] |
| pxor xmm2,xmm3 |
| pxor xmm6,xmm7 |
| pxor xmm10,xmm11 |
| pxor xmm15,xmm14 |
| movdqu XMMWORD[(0 + 0)+rdi],xmm2 |
| movdqu XMMWORD[(16 + 0)+rdi],xmm6 |
| movdqu XMMWORD[(32 + 0)+rdi],xmm10 |
| movdqu XMMWORD[(48 + 0)+rdi],xmm15 |
| movdqu xmm3,XMMWORD[((0 + 64))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 64))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 64))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 64))+rsi] |
| pxor xmm1,xmm3 |
| pxor xmm5,xmm7 |
| pxor xmm9,xmm11 |
| pxor xmm15,xmm13 |
| movdqu XMMWORD[(0 + 64)+rdi],xmm1 |
| movdqu XMMWORD[(16 + 64)+rdi],xmm5 |
| movdqu XMMWORD[(32 + 64)+rdi],xmm9 |
| movdqu XMMWORD[(48 + 64)+rdi],xmm15 |
| |
| sub rbx,8*16 |
| lea rsi,[128+rsi] |
| lea rdi,[128+rdi] |
| jmp NEAR $L$open_sse_tail_64_dec_loop |
| |
| $L$open_sse_tail_256: |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm1,xmm0 |
| movdqa xmm5,xmm4 |
| movdqa xmm9,xmm8 |
| movdqa xmm2,xmm0 |
| movdqa xmm6,xmm4 |
| movdqa xmm10,xmm8 |
| movdqa xmm3,xmm0 |
| movdqa xmm7,xmm4 |
| movdqa xmm11,xmm8 |
| movdqa xmm15,XMMWORD[((160+96))+rbp] |
| paddd xmm15,XMMWORD[$L$sse_inc] |
| movdqa xmm14,xmm15 |
| paddd xmm14,XMMWORD[$L$sse_inc] |
| movdqa xmm13,xmm14 |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm12,xmm13 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| movdqa XMMWORD[(160+112)+rbp],xmm13 |
| movdqa XMMWORD[(160+128)+rbp],xmm14 |
| movdqa XMMWORD[(160+144)+rbp],xmm15 |
| |
| xor r8,r8 |
| $L$open_sse_tail_256_rounds_and_x1hash: |
| add r10,QWORD[((0+0))+r8*1+rsi] |
| adc r11,QWORD[((8+0))+r8*1+rsi] |
| adc r12,1 |
| movdqa XMMWORD[(160+80)+rbp],xmm11 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm11,xmm4 |
| pslld xmm11,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm11 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm11,xmm4 |
| pslld xmm11,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm11 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm11,xmm5 |
| pslld xmm11,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm11 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm11,xmm5 |
| pslld xmm11,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm11 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm11,xmm6 |
| pslld xmm11,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm11 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm11,xmm6 |
| pslld xmm11,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm11 |
| DB 102,15,58,15,246,4 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,12 |
| movdqa xmm11,XMMWORD[((160+80))+rbp] |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| movdqa XMMWORD[(160+80)+rbp],xmm9 |
| paddd xmm3,xmm7 |
| pxor xmm15,xmm3 |
| pshufb xmm15,XMMWORD[$L$rol16] |
| paddd xmm11,xmm15 |
| pxor xmm7,xmm11 |
| movdqa xmm9,xmm7 |
| pslld xmm9,12 |
| psrld xmm7,20 |
| pxor xmm7,xmm9 |
| paddd xmm3,xmm7 |
| pxor xmm15,xmm3 |
| pshufb xmm15,XMMWORD[$L$rol8] |
| paddd xmm11,xmm15 |
| pxor xmm7,xmm11 |
| movdqa xmm9,xmm7 |
| pslld xmm9,7 |
| psrld xmm7,25 |
| pxor xmm7,xmm9 |
| DB 102,15,58,15,255,4 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,12 |
| movdqa xmm9,XMMWORD[((160+80))+rbp] |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| movdqa XMMWORD[(160+80)+rbp],xmm11 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm11,xmm4 |
| pslld xmm11,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm11 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm11,xmm4 |
| pslld xmm11,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm11 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm11,xmm5 |
| pslld xmm11,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm11 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm11,xmm5 |
| pslld xmm11,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm11 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm11,xmm6 |
| pslld xmm11,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm11 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm11,xmm6 |
| pslld xmm11,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm11 |
| DB 102,15,58,15,246,12 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,4 |
| movdqa xmm11,XMMWORD[((160+80))+rbp] |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| movdqa XMMWORD[(160+80)+rbp],xmm9 |
| paddd xmm3,xmm7 |
| pxor xmm15,xmm3 |
| pshufb xmm15,XMMWORD[$L$rol16] |
| paddd xmm11,xmm15 |
| pxor xmm7,xmm11 |
| movdqa xmm9,xmm7 |
| pslld xmm9,12 |
| psrld xmm7,20 |
| pxor xmm7,xmm9 |
| paddd xmm3,xmm7 |
| pxor xmm15,xmm3 |
| pshufb xmm15,XMMWORD[$L$rol8] |
| paddd xmm11,xmm15 |
| pxor xmm7,xmm11 |
| movdqa xmm9,xmm7 |
| pslld xmm9,7 |
| psrld xmm7,25 |
| pxor xmm7,xmm9 |
| DB 102,15,58,15,255,12 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,4 |
| movdqa xmm9,XMMWORD[((160+80))+rbp] |
| |
| add r8,16 |
| cmp r8,10*16 |
| jb NEAR $L$open_sse_tail_256_rounds_and_x1hash |
| |
| mov rcx,rbx |
| and rcx,-16 |
| $L$open_sse_tail_256_hash: |
| add r10,QWORD[((0+0))+r8*1+rsi] |
| adc r11,QWORD[((8+0))+r8*1+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| add r8,16 |
| cmp r8,rcx |
| jb NEAR $L$open_sse_tail_256_hash |
| paddd xmm3,XMMWORD[$L$chacha20_consts] |
| paddd xmm7,XMMWORD[((160+48))+rbp] |
| paddd xmm11,XMMWORD[((160+64))+rbp] |
| paddd xmm15,XMMWORD[((160+144))+rbp] |
| paddd xmm2,XMMWORD[$L$chacha20_consts] |
| paddd xmm6,XMMWORD[((160+48))+rbp] |
| paddd xmm10,XMMWORD[((160+64))+rbp] |
| paddd xmm14,XMMWORD[((160+128))+rbp] |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm5,XMMWORD[((160+48))+rbp] |
| paddd xmm9,XMMWORD[((160+64))+rbp] |
| paddd xmm13,XMMWORD[((160+112))+rbp] |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| movdqa XMMWORD[(160+80)+rbp],xmm12 |
| movdqu xmm12,XMMWORD[((0 + 0))+rsi] |
| pxor xmm12,xmm3 |
| movdqu XMMWORD[(0 + 0)+rdi],xmm12 |
| movdqu xmm12,XMMWORD[((16 + 0))+rsi] |
| pxor xmm12,xmm7 |
| movdqu XMMWORD[(16 + 0)+rdi],xmm12 |
| movdqu xmm12,XMMWORD[((32 + 0))+rsi] |
| pxor xmm12,xmm11 |
| movdqu XMMWORD[(32 + 0)+rdi],xmm12 |
| movdqu xmm12,XMMWORD[((48 + 0))+rsi] |
| pxor xmm12,xmm15 |
| movdqu XMMWORD[(48 + 0)+rdi],xmm12 |
| movdqu xmm3,XMMWORD[((0 + 64))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 64))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 64))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 64))+rsi] |
| pxor xmm2,xmm3 |
| pxor xmm6,xmm7 |
| pxor xmm10,xmm11 |
| pxor xmm15,xmm14 |
| movdqu XMMWORD[(0 + 64)+rdi],xmm2 |
| movdqu XMMWORD[(16 + 64)+rdi],xmm6 |
| movdqu XMMWORD[(32 + 64)+rdi],xmm10 |
| movdqu XMMWORD[(48 + 64)+rdi],xmm15 |
| movdqu xmm3,XMMWORD[((0 + 128))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 128))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 128))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 128))+rsi] |
| pxor xmm1,xmm3 |
| pxor xmm5,xmm7 |
| pxor xmm9,xmm11 |
| pxor xmm15,xmm13 |
| movdqu XMMWORD[(0 + 128)+rdi],xmm1 |
| movdqu XMMWORD[(16 + 128)+rdi],xmm5 |
| movdqu XMMWORD[(32 + 128)+rdi],xmm9 |
| movdqu XMMWORD[(48 + 128)+rdi],xmm15 |
| |
| movdqa xmm12,XMMWORD[((160+80))+rbp] |
| sub rbx,12*16 |
| lea rsi,[192+rsi] |
| lea rdi,[192+rdi] |
| |
| |
| $L$open_sse_tail_64_dec_loop: |
| cmp rbx,16 |
| jb NEAR $L$open_sse_tail_16_init |
| sub rbx,16 |
| movdqu xmm3,XMMWORD[rsi] |
| pxor xmm0,xmm3 |
| movdqu XMMWORD[rdi],xmm0 |
| lea rsi,[16+rsi] |
| lea rdi,[16+rdi] |
| movdqa xmm0,xmm4 |
| movdqa xmm4,xmm8 |
| movdqa xmm8,xmm12 |
| jmp NEAR $L$open_sse_tail_64_dec_loop |
| $L$open_sse_tail_16_init: |
| movdqa xmm1,xmm0 |
| |
| |
| $L$open_sse_tail_16: |
| test rbx,rbx |
| jz NEAR $L$open_sse_finalize |
| |
| |
| |
| pxor xmm3,xmm3 |
| lea rsi,[((-1))+rbx*1+rsi] |
| mov r8,rbx |
| $L$open_sse_tail_16_compose: |
| pslldq xmm3,1 |
| pinsrb xmm3,BYTE[rsi],0 |
| sub rsi,1 |
| sub r8,1 |
| jnz NEAR $L$open_sse_tail_16_compose |
| |
| DB 102,73,15,126,221 |
| pextrq r14,xmm3,1 |
| |
| pxor xmm3,xmm1 |
| |
| |
| $L$open_sse_tail_16_extract: |
| pextrb XMMWORD[rdi],xmm3,0 |
| psrldq xmm3,1 |
| add rdi,1 |
| sub rbx,1 |
| jne NEAR $L$open_sse_tail_16_extract |
| |
| add r10,r13 |
| adc r11,r14 |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| |
| $L$open_sse_finalize: |
| add r10,QWORD[((0+160+32))+rbp] |
| adc r11,QWORD[((8+160+32))+rbp] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| |
| mov r13,r10 |
| mov r14,r11 |
| mov r15,r12 |
| sub r10,-5 |
| sbb r11,-1 |
| sbb r12,3 |
| cmovc r10,r13 |
| cmovc r11,r14 |
| cmovc r12,r15 |
| |
| add r10,QWORD[((0+160+16))+rbp] |
| adc r11,QWORD[((8+160+16))+rbp] |
| |
| movaps xmm6,XMMWORD[((0+0))+rbp] |
| movaps xmm7,XMMWORD[((16+0))+rbp] |
| movaps xmm8,XMMWORD[((32+0))+rbp] |
| movaps xmm9,XMMWORD[((48+0))+rbp] |
| movaps xmm10,XMMWORD[((64+0))+rbp] |
| movaps xmm11,XMMWORD[((80+0))+rbp] |
| movaps xmm12,XMMWORD[((96+0))+rbp] |
| movaps xmm13,XMMWORD[((112+0))+rbp] |
| movaps xmm14,XMMWORD[((128+0))+rbp] |
| movaps xmm15,XMMWORD[((144+0))+rbp] |
| |
| |
| add rsp,288 + 160 + 32 |
| |
| |
| pop r9 |
| |
| mov QWORD[r9],r10 |
| mov QWORD[8+r9],r11 |
| pop r15 |
| |
| pop r14 |
| |
| pop r13 |
| |
| pop r12 |
| |
| pop rbx |
| |
| pop rbp |
| |
| mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
| mov rsi,QWORD[16+rsp] |
| ret |
| |
| $L$open_sse_128: |
| |
| movdqu xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm1,xmm0 |
| movdqa xmm2,xmm0 |
| movdqu xmm4,XMMWORD[r9] |
| movdqa xmm5,xmm4 |
| movdqa xmm6,xmm4 |
| movdqu xmm8,XMMWORD[16+r9] |
| movdqa xmm9,xmm8 |
| movdqa xmm10,xmm8 |
| movdqu xmm12,XMMWORD[32+r9] |
| movdqa xmm13,xmm12 |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm14,xmm13 |
| paddd xmm14,XMMWORD[$L$sse_inc] |
| movdqa xmm7,xmm4 |
| movdqa xmm11,xmm8 |
| movdqa xmm15,xmm13 |
| mov r10,10 |
| |
| $L$open_sse_128_rounds: |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm3 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm3 |
| DB 102,15,58,15,246,4 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,12 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm3 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm3 |
| DB 102,15,58,15,246,12 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,4 |
| |
| dec r10 |
| jnz NEAR $L$open_sse_128_rounds |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm2,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,xmm7 |
| paddd xmm5,xmm7 |
| paddd xmm6,xmm7 |
| paddd xmm9,xmm11 |
| paddd xmm10,xmm11 |
| paddd xmm13,xmm15 |
| paddd xmm15,XMMWORD[$L$sse_inc] |
| paddd xmm14,xmm15 |
| |
| pand xmm0,XMMWORD[$L$clamp] |
| movdqa XMMWORD[(160+0)+rbp],xmm0 |
| movdqa XMMWORD[(160+16)+rbp],xmm4 |
| |
| mov r8,r8 |
| call poly_hash_ad_internal |
| $L$open_sse_128_xor_hash: |
| cmp rbx,16 |
| jb NEAR $L$open_sse_tail_16 |
| sub rbx,16 |
| add r10,QWORD[((0+0))+rsi] |
| adc r11,QWORD[((8+0))+rsi] |
| adc r12,1 |
| |
| |
| movdqu xmm3,XMMWORD[rsi] |
| pxor xmm1,xmm3 |
| movdqu XMMWORD[rdi],xmm1 |
| lea rsi,[16+rsi] |
| lea rdi,[16+rdi] |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| |
| movdqa xmm1,xmm5 |
| movdqa xmm5,xmm9 |
| movdqa xmm9,xmm13 |
| movdqa xmm13,xmm2 |
| movdqa xmm2,xmm6 |
| movdqa xmm6,xmm10 |
| movdqa xmm10,xmm14 |
| jmp NEAR $L$open_sse_128_xor_hash |
| $L$SEH_end_chacha20_poly1305_open_nohw: |
| |
| |
| |
| |
| |
| |
| |
| |
| global chacha20_poly1305_seal_nohw |
| |
| ALIGN 64 |
| chacha20_poly1305_seal_nohw: |
| mov QWORD[8+rsp],rdi ;WIN64 prologue |
| mov QWORD[16+rsp],rsi |
| mov rax,rsp |
| $L$SEH_begin_chacha20_poly1305_seal_nohw: |
| mov rdi,rcx |
| mov rsi,rdx |
| mov rdx,r8 |
| mov rcx,r9 |
| mov r8,QWORD[40+rsp] |
| mov r9,QWORD[48+rsp] |
| |
| |
| |
| _CET_ENDBR |
| push rbp |
| |
| push rbx |
| |
| push r12 |
| |
| push r13 |
| |
| push r14 |
| |
| push r15 |
| |
| |
| |
| push r9 |
| |
| sub rsp,288 + 160 + 32 |
| |
| lea rbp,[32+rsp] |
| and rbp,-32 |
| |
| movaps XMMWORD[(0+0)+rbp],xmm6 |
| movaps XMMWORD[(16+0)+rbp],xmm7 |
| movaps XMMWORD[(32+0)+rbp],xmm8 |
| movaps XMMWORD[(48+0)+rbp],xmm9 |
| movaps XMMWORD[(64+0)+rbp],xmm10 |
| movaps XMMWORD[(80+0)+rbp],xmm11 |
| movaps XMMWORD[(96+0)+rbp],xmm12 |
| movaps XMMWORD[(112+0)+rbp],xmm13 |
| movaps XMMWORD[(128+0)+rbp],xmm14 |
| movaps XMMWORD[(144+0)+rbp],xmm15 |
| |
| mov rbx,QWORD[56+r9] |
| add rbx,rdx |
| mov QWORD[((0+160+32))+rbp],r8 |
| mov QWORD[((8+160+32))+rbp],rbx |
| mov rbx,rdx |
| |
| cmp rbx,128 |
| jbe NEAR $L$seal_sse_128 |
| |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqu xmm4,XMMWORD[r9] |
| movdqu xmm8,XMMWORD[16+r9] |
| movdqu xmm12,XMMWORD[32+r9] |
| |
| movdqa xmm1,xmm0 |
| movdqa xmm2,xmm0 |
| movdqa xmm3,xmm0 |
| movdqa xmm5,xmm4 |
| movdqa xmm6,xmm4 |
| movdqa xmm7,xmm4 |
| movdqa xmm9,xmm8 |
| movdqa xmm10,xmm8 |
| movdqa xmm11,xmm8 |
| movdqa xmm15,xmm12 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa xmm14,xmm12 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa xmm13,xmm12 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| |
| movdqa XMMWORD[(160+48)+rbp],xmm4 |
| movdqa XMMWORD[(160+64)+rbp],xmm8 |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| movdqa XMMWORD[(160+112)+rbp],xmm13 |
| movdqa XMMWORD[(160+128)+rbp],xmm14 |
| movdqa XMMWORD[(160+144)+rbp],xmm15 |
| mov r10,10 |
| $L$seal_sse_init_rounds: |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,XMMWORD[$L$rol16] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,20 |
| pslld xmm7,32-20 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,20 |
| pslld xmm6,32-20 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,20 |
| pslld xmm5,32-20 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,20 |
| pslld xmm4,32-20 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[$L$rol8] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,25 |
| pslld xmm7,32-25 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,25 |
| pslld xmm6,32-25 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,25 |
| pslld xmm5,32-25 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,25 |
| pslld xmm4,32-25 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| DB 102,15,58,15,255,4 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,12 |
| DB 102,15,58,15,246,4 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,12 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,XMMWORD[$L$rol16] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,20 |
| pslld xmm7,32-20 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,20 |
| pslld xmm6,32-20 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,20 |
| pslld xmm5,32-20 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,20 |
| pslld xmm4,32-20 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[$L$rol8] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,25 |
| pslld xmm7,32-25 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,25 |
| pslld xmm6,32-25 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,25 |
| pslld xmm5,32-25 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,25 |
| pslld xmm4,32-25 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| DB 102,15,58,15,255,12 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,4 |
| DB 102,15,58,15,246,12 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,4 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| |
| dec r10 |
| jnz NEAR $L$seal_sse_init_rounds |
| paddd xmm3,XMMWORD[$L$chacha20_consts] |
| paddd xmm7,XMMWORD[((160+48))+rbp] |
| paddd xmm11,XMMWORD[((160+64))+rbp] |
| paddd xmm15,XMMWORD[((160+144))+rbp] |
| paddd xmm2,XMMWORD[$L$chacha20_consts] |
| paddd xmm6,XMMWORD[((160+48))+rbp] |
| paddd xmm10,XMMWORD[((160+64))+rbp] |
| paddd xmm14,XMMWORD[((160+128))+rbp] |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm5,XMMWORD[((160+48))+rbp] |
| paddd xmm9,XMMWORD[((160+64))+rbp] |
| paddd xmm13,XMMWORD[((160+112))+rbp] |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| |
| |
| pand xmm3,XMMWORD[$L$clamp] |
| movdqa XMMWORD[(160+0)+rbp],xmm3 |
| movdqa XMMWORD[(160+16)+rbp],xmm7 |
| |
| mov r8,r8 |
| call poly_hash_ad_internal |
| movdqu xmm3,XMMWORD[((0 + 0))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 0))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 0))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 0))+rsi] |
| pxor xmm2,xmm3 |
| pxor xmm6,xmm7 |
| pxor xmm10,xmm11 |
| pxor xmm15,xmm14 |
| movdqu XMMWORD[(0 + 0)+rdi],xmm2 |
| movdqu XMMWORD[(16 + 0)+rdi],xmm6 |
| movdqu XMMWORD[(32 + 0)+rdi],xmm10 |
| movdqu XMMWORD[(48 + 0)+rdi],xmm15 |
| movdqu xmm3,XMMWORD[((0 + 64))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 64))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 64))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 64))+rsi] |
| pxor xmm1,xmm3 |
| pxor xmm5,xmm7 |
| pxor xmm9,xmm11 |
| pxor xmm15,xmm13 |
| movdqu XMMWORD[(0 + 64)+rdi],xmm1 |
| movdqu XMMWORD[(16 + 64)+rdi],xmm5 |
| movdqu XMMWORD[(32 + 64)+rdi],xmm9 |
| movdqu XMMWORD[(48 + 64)+rdi],xmm15 |
| |
| cmp rbx,12*16 |
| ja NEAR $L$seal_sse_main_init |
| mov rcx,8*16 |
| sub rbx,8*16 |
| lea rsi,[128+rsi] |
| jmp NEAR $L$seal_sse_128_tail_hash |
| $L$seal_sse_main_init: |
| movdqu xmm3,XMMWORD[((0 + 128))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 128))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 128))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 128))+rsi] |
| pxor xmm0,xmm3 |
| pxor xmm4,xmm7 |
| pxor xmm8,xmm11 |
| pxor xmm15,xmm12 |
| movdqu XMMWORD[(0 + 128)+rdi],xmm0 |
| movdqu XMMWORD[(16 + 128)+rdi],xmm4 |
| movdqu XMMWORD[(32 + 128)+rdi],xmm8 |
| movdqu XMMWORD[(48 + 128)+rdi],xmm15 |
| |
| mov rcx,12*16 |
| sub rbx,12*16 |
| lea rsi,[192+rsi] |
| mov rcx,2 |
| mov r8,8 |
| cmp rbx,4*16 |
| jbe NEAR $L$seal_sse_tail_64 |
| cmp rbx,8*16 |
| jbe NEAR $L$seal_sse_tail_128 |
| cmp rbx,12*16 |
| jbe NEAR $L$seal_sse_tail_192 |
| |
| $L$seal_sse_main_loop: |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm1,xmm0 |
| movdqa xmm5,xmm4 |
| movdqa xmm9,xmm8 |
| movdqa xmm2,xmm0 |
| movdqa xmm6,xmm4 |
| movdqa xmm10,xmm8 |
| movdqa xmm3,xmm0 |
| movdqa xmm7,xmm4 |
| movdqa xmm11,xmm8 |
| movdqa xmm15,XMMWORD[((160+96))+rbp] |
| paddd xmm15,XMMWORD[$L$sse_inc] |
| movdqa xmm14,xmm15 |
| paddd xmm14,XMMWORD[$L$sse_inc] |
| movdqa xmm13,xmm14 |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm12,xmm13 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| movdqa XMMWORD[(160+112)+rbp],xmm13 |
| movdqa XMMWORD[(160+128)+rbp],xmm14 |
| movdqa XMMWORD[(160+144)+rbp],xmm15 |
| |
| ALIGN 32 |
| $L$seal_sse_main_rounds: |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,XMMWORD[$L$rol16] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,20 |
| pslld xmm7,32-20 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,20 |
| pslld xmm6,32-20 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,20 |
| pslld xmm5,32-20 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,20 |
| pslld xmm4,32-20 |
| pxor xmm4,xmm8 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| movdqa xmm8,XMMWORD[$L$rol8] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,25 |
| pslld xmm7,32-25 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,25 |
| pslld xmm6,32-25 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,25 |
| pslld xmm5,32-25 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,25 |
| pslld xmm4,32-25 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| DB 102,15,58,15,255,4 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,12 |
| DB 102,15,58,15,246,4 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,12 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,XMMWORD[$L$rol16] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,20 |
| pslld xmm7,32-20 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,20 |
| pslld xmm6,32-20 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,20 |
| pslld xmm5,32-20 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,20 |
| pslld xmm4,32-20 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[$L$rol8] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,25 |
| pslld xmm7,32-25 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,25 |
| pslld xmm6,32-25 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,25 |
| pslld xmm5,32-25 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,25 |
| pslld xmm4,32-25 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| DB 102,15,58,15,255,12 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,4 |
| DB 102,15,58,15,246,12 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,4 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| |
| lea rdi,[16+rdi] |
| dec r8 |
| jge NEAR $L$seal_sse_main_rounds |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[16+rdi] |
| dec rcx |
| jg NEAR $L$seal_sse_main_rounds |
| paddd xmm3,XMMWORD[$L$chacha20_consts] |
| paddd xmm7,XMMWORD[((160+48))+rbp] |
| paddd xmm11,XMMWORD[((160+64))+rbp] |
| paddd xmm15,XMMWORD[((160+144))+rbp] |
| paddd xmm2,XMMWORD[$L$chacha20_consts] |
| paddd xmm6,XMMWORD[((160+48))+rbp] |
| paddd xmm10,XMMWORD[((160+64))+rbp] |
| paddd xmm14,XMMWORD[((160+128))+rbp] |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm5,XMMWORD[((160+48))+rbp] |
| paddd xmm9,XMMWORD[((160+64))+rbp] |
| paddd xmm13,XMMWORD[((160+112))+rbp] |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| |
| movdqa XMMWORD[(160+80)+rbp],xmm14 |
| movdqa XMMWORD[(160+80)+rbp],xmm14 |
| movdqu xmm14,XMMWORD[((0 + 0))+rsi] |
| pxor xmm14,xmm3 |
| movdqu XMMWORD[(0 + 0)+rdi],xmm14 |
| movdqu xmm14,XMMWORD[((16 + 0))+rsi] |
| pxor xmm14,xmm7 |
| movdqu XMMWORD[(16 + 0)+rdi],xmm14 |
| movdqu xmm14,XMMWORD[((32 + 0))+rsi] |
| pxor xmm14,xmm11 |
| movdqu XMMWORD[(32 + 0)+rdi],xmm14 |
| movdqu xmm14,XMMWORD[((48 + 0))+rsi] |
| pxor xmm14,xmm15 |
| movdqu XMMWORD[(48 + 0)+rdi],xmm14 |
| |
| movdqa xmm14,XMMWORD[((160+80))+rbp] |
| movdqu xmm3,XMMWORD[((0 + 64))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 64))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 64))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 64))+rsi] |
| pxor xmm2,xmm3 |
| pxor xmm6,xmm7 |
| pxor xmm10,xmm11 |
| pxor xmm15,xmm14 |
| movdqu XMMWORD[(0 + 64)+rdi],xmm2 |
| movdqu XMMWORD[(16 + 64)+rdi],xmm6 |
| movdqu XMMWORD[(32 + 64)+rdi],xmm10 |
| movdqu XMMWORD[(48 + 64)+rdi],xmm15 |
| movdqu xmm3,XMMWORD[((0 + 128))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 128))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 128))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 128))+rsi] |
| pxor xmm1,xmm3 |
| pxor xmm5,xmm7 |
| pxor xmm9,xmm11 |
| pxor xmm15,xmm13 |
| movdqu XMMWORD[(0 + 128)+rdi],xmm1 |
| movdqu XMMWORD[(16 + 128)+rdi],xmm5 |
| movdqu XMMWORD[(32 + 128)+rdi],xmm9 |
| movdqu XMMWORD[(48 + 128)+rdi],xmm15 |
| |
| cmp rbx,16*16 |
| ja NEAR $L$seal_sse_main_loop_xor |
| |
| mov rcx,12*16 |
| sub rbx,12*16 |
| lea rsi,[192+rsi] |
| jmp NEAR $L$seal_sse_128_tail_hash |
| $L$seal_sse_main_loop_xor: |
| movdqu xmm3,XMMWORD[((0 + 192))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 192))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 192))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 192))+rsi] |
| pxor xmm0,xmm3 |
| pxor xmm4,xmm7 |
| pxor xmm8,xmm11 |
| pxor xmm15,xmm12 |
| movdqu XMMWORD[(0 + 192)+rdi],xmm0 |
| movdqu XMMWORD[(16 + 192)+rdi],xmm4 |
| movdqu XMMWORD[(32 + 192)+rdi],xmm8 |
| movdqu XMMWORD[(48 + 192)+rdi],xmm15 |
| |
| lea rsi,[256+rsi] |
| sub rbx,16*16 |
| mov rcx,6 |
| mov r8,4 |
| cmp rbx,12*16 |
| jg NEAR $L$seal_sse_main_loop |
| mov rcx,rbx |
| test rbx,rbx |
| je NEAR $L$seal_sse_128_tail_hash |
| mov rcx,6 |
| cmp rbx,8*16 |
| ja NEAR $L$seal_sse_tail_192 |
| cmp rbx,4*16 |
| ja NEAR $L$seal_sse_tail_128 |
| |
| $L$seal_sse_tail_64: |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm12,XMMWORD[((160+96))+rbp] |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| |
| $L$seal_sse_tail_64_rounds_and_x2hash: |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[16+rdi] |
| $L$seal_sse_tail_64_rounds_and_x1hash: |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[16+rdi] |
| dec rcx |
| jg NEAR $L$seal_sse_tail_64_rounds_and_x2hash |
| dec r8 |
| jge NEAR $L$seal_sse_tail_64_rounds_and_x1hash |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| |
| jmp NEAR $L$seal_sse_128_tail_xor |
| |
| $L$seal_sse_tail_128: |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm1,xmm0 |
| movdqa xmm5,xmm4 |
| movdqa xmm9,xmm8 |
| movdqa xmm13,XMMWORD[((160+96))+rbp] |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm12,xmm13 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| movdqa XMMWORD[(160+112)+rbp],xmm13 |
| |
| $L$seal_sse_tail_128_rounds_and_x2hash: |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[16+rdi] |
| $L$seal_sse_tail_128_rounds_and_x1hash: |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| |
| lea rdi,[16+rdi] |
| dec rcx |
| jg NEAR $L$seal_sse_tail_128_rounds_and_x2hash |
| dec r8 |
| jge NEAR $L$seal_sse_tail_128_rounds_and_x1hash |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm5,XMMWORD[((160+48))+rbp] |
| paddd xmm9,XMMWORD[((160+64))+rbp] |
| paddd xmm13,XMMWORD[((160+112))+rbp] |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| movdqu xmm3,XMMWORD[((0 + 0))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 0))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 0))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 0))+rsi] |
| pxor xmm1,xmm3 |
| pxor xmm5,xmm7 |
| pxor xmm9,xmm11 |
| pxor xmm15,xmm13 |
| movdqu XMMWORD[(0 + 0)+rdi],xmm1 |
| movdqu XMMWORD[(16 + 0)+rdi],xmm5 |
| movdqu XMMWORD[(32 + 0)+rdi],xmm9 |
| movdqu XMMWORD[(48 + 0)+rdi],xmm15 |
| |
| mov rcx,4*16 |
| sub rbx,4*16 |
| lea rsi,[64+rsi] |
| jmp NEAR $L$seal_sse_128_tail_hash |
| |
| $L$seal_sse_tail_192: |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm1,xmm0 |
| movdqa xmm5,xmm4 |
| movdqa xmm9,xmm8 |
| movdqa xmm2,xmm0 |
| movdqa xmm6,xmm4 |
| movdqa xmm10,xmm8 |
| movdqa xmm14,XMMWORD[((160+96))+rbp] |
| paddd xmm14,XMMWORD[$L$sse_inc] |
| movdqa xmm13,xmm14 |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm12,xmm13 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| movdqa XMMWORD[(160+112)+rbp],xmm13 |
| movdqa XMMWORD[(160+128)+rbp],xmm14 |
| |
| $L$seal_sse_tail_192_rounds_and_x2hash: |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[16+rdi] |
| $L$seal_sse_tail_192_rounds_and_x1hash: |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm3 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm3 |
| DB 102,15,58,15,246,4 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,12 |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm3 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm3 |
| DB 102,15,58,15,246,12 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,4 |
| |
| lea rdi,[16+rdi] |
| dec rcx |
| jg NEAR $L$seal_sse_tail_192_rounds_and_x2hash |
| dec r8 |
| jge NEAR $L$seal_sse_tail_192_rounds_and_x1hash |
| paddd xmm2,XMMWORD[$L$chacha20_consts] |
| paddd xmm6,XMMWORD[((160+48))+rbp] |
| paddd xmm10,XMMWORD[((160+64))+rbp] |
| paddd xmm14,XMMWORD[((160+128))+rbp] |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm5,XMMWORD[((160+48))+rbp] |
| paddd xmm9,XMMWORD[((160+64))+rbp] |
| paddd xmm13,XMMWORD[((160+112))+rbp] |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| movdqu xmm3,XMMWORD[((0 + 0))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 0))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 0))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 0))+rsi] |
| pxor xmm2,xmm3 |
| pxor xmm6,xmm7 |
| pxor xmm10,xmm11 |
| pxor xmm15,xmm14 |
| movdqu XMMWORD[(0 + 0)+rdi],xmm2 |
| movdqu XMMWORD[(16 + 0)+rdi],xmm6 |
| movdqu XMMWORD[(32 + 0)+rdi],xmm10 |
| movdqu XMMWORD[(48 + 0)+rdi],xmm15 |
| movdqu xmm3,XMMWORD[((0 + 64))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 64))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 64))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 64))+rsi] |
| pxor xmm1,xmm3 |
| pxor xmm5,xmm7 |
| pxor xmm9,xmm11 |
| pxor xmm15,xmm13 |
| movdqu XMMWORD[(0 + 64)+rdi],xmm1 |
| movdqu XMMWORD[(16 + 64)+rdi],xmm5 |
| movdqu XMMWORD[(32 + 64)+rdi],xmm9 |
| movdqu XMMWORD[(48 + 64)+rdi],xmm15 |
| |
| mov rcx,8*16 |
| sub rbx,8*16 |
| lea rsi,[128+rsi] |
| |
| $L$seal_sse_128_tail_hash: |
| cmp rcx,16 |
| jb NEAR $L$seal_sse_128_tail_xor |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| sub rcx,16 |
| lea rdi,[16+rdi] |
| jmp NEAR $L$seal_sse_128_tail_hash |
| |
| $L$seal_sse_128_tail_xor: |
| cmp rbx,16 |
| jb NEAR $L$seal_sse_tail_16 |
| sub rbx,16 |
| |
| movdqu xmm3,XMMWORD[rsi] |
| pxor xmm0,xmm3 |
| movdqu XMMWORD[rdi],xmm0 |
| |
| add r10,QWORD[rdi] |
| adc r11,QWORD[8+rdi] |
| adc r12,1 |
| lea rsi,[16+rsi] |
| lea rdi,[16+rdi] |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| |
| movdqa xmm0,xmm4 |
| movdqa xmm4,xmm8 |
| movdqa xmm8,xmm12 |
| movdqa xmm12,xmm1 |
| movdqa xmm1,xmm5 |
| movdqa xmm5,xmm9 |
| movdqa xmm9,xmm13 |
| jmp NEAR $L$seal_sse_128_tail_xor |
| |
| $L$seal_sse_tail_16: |
| test rbx,rbx |
| jz NEAR $L$process_blocks_of_extra_in |
| |
| mov r8,rbx |
| mov rcx,rbx |
| lea rsi,[((-1))+rbx*1+rsi] |
| pxor xmm15,xmm15 |
| $L$seal_sse_tail_16_compose: |
| pslldq xmm15,1 |
| pinsrb xmm15,BYTE[rsi],0 |
| lea rsi,[((-1))+rsi] |
| dec rcx |
| jne NEAR $L$seal_sse_tail_16_compose |
| |
| |
| pxor xmm15,xmm0 |
| |
| |
| mov rcx,rbx |
| movdqu xmm0,xmm15 |
| $L$seal_sse_tail_16_extract: |
| pextrb XMMWORD[rdi],xmm0,0 |
| psrldq xmm0,1 |
| add rdi,1 |
| sub rcx,1 |
| jnz NEAR $L$seal_sse_tail_16_extract |
| |
| |
| |
| |
| |
| |
| |
| |
| mov r9,QWORD[((288 + 160 + 32))+rsp] |
| mov r14,QWORD[56+r9] |
| mov r13,QWORD[48+r9] |
| test r14,r14 |
| jz NEAR $L$process_partial_block |
| |
| mov r15,16 |
| sub r15,rbx |
| cmp r14,r15 |
| |
| jge NEAR $L$load_extra_in |
| mov r15,r14 |
| |
| $L$load_extra_in: |
| |
| |
| lea rsi,[((-1))+r15*1+r13] |
| |
| |
| add r13,r15 |
| sub r14,r15 |
| mov QWORD[48+r9],r13 |
| mov QWORD[56+r9],r14 |
| |
| |
| |
| add r8,r15 |
| |
| |
| pxor xmm11,xmm11 |
| $L$load_extra_load_loop: |
| pslldq xmm11,1 |
| pinsrb xmm11,BYTE[rsi],0 |
| lea rsi,[((-1))+rsi] |
| sub r15,1 |
| jnz NEAR $L$load_extra_load_loop |
| |
| |
| |
| |
| mov r15,rbx |
| |
| $L$load_extra_shift_loop: |
| pslldq xmm11,1 |
| sub r15,1 |
| jnz NEAR $L$load_extra_shift_loop |
| |
| |
| |
| |
| lea r15,[$L$and_masks] |
| shl rbx,4 |
| pand xmm15,XMMWORD[((-16))+rbx*1+r15] |
| |
| |
| por xmm15,xmm11 |
| |
| |
| |
| DB 102,77,15,126,253 |
| pextrq r14,xmm15,1 |
| add r10,r13 |
| adc r11,r14 |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| |
| $L$process_blocks_of_extra_in: |
| |
| mov r9,QWORD[((288+32+160 ))+rsp] |
| mov rsi,QWORD[48+r9] |
| mov r8,QWORD[56+r9] |
| mov rcx,r8 |
| shr r8,4 |
| |
| $L$process_extra_hash_loop: |
| jz NEAR process_extra_in_trailer |
| add r10,QWORD[((0+0))+rsi] |
| adc r11,QWORD[((8+0))+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rsi,[16+rsi] |
| sub r8,1 |
| jmp NEAR $L$process_extra_hash_loop |
| process_extra_in_trailer: |
| and rcx,15 |
| mov rbx,rcx |
| jz NEAR $L$do_length_block |
| lea rsi,[((-1))+rcx*1+rsi] |
| |
| $L$process_extra_in_trailer_load: |
| pslldq xmm15,1 |
| pinsrb xmm15,BYTE[rsi],0 |
| lea rsi,[((-1))+rsi] |
| sub rcx,1 |
| jnz NEAR $L$process_extra_in_trailer_load |
| |
| $L$process_partial_block: |
| |
| lea r15,[$L$and_masks] |
| shl rbx,4 |
| pand xmm15,XMMWORD[((-16))+rbx*1+r15] |
| DB 102,77,15,126,253 |
| pextrq r14,xmm15,1 |
| add r10,r13 |
| adc r11,r14 |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| |
| $L$do_length_block: |
| add r10,QWORD[((0+160+32))+rbp] |
| adc r11,QWORD[((8+160+32))+rbp] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| |
| mov r13,r10 |
| mov r14,r11 |
| mov r15,r12 |
| sub r10,-5 |
| sbb r11,-1 |
| sbb r12,3 |
| cmovc r10,r13 |
| cmovc r11,r14 |
| cmovc r12,r15 |
| |
| add r10,QWORD[((0+160+16))+rbp] |
| adc r11,QWORD[((8+160+16))+rbp] |
| |
| movaps xmm6,XMMWORD[((0+0))+rbp] |
| movaps xmm7,XMMWORD[((16+0))+rbp] |
| movaps xmm8,XMMWORD[((32+0))+rbp] |
| movaps xmm9,XMMWORD[((48+0))+rbp] |
| movaps xmm10,XMMWORD[((64+0))+rbp] |
| movaps xmm11,XMMWORD[((80+0))+rbp] |
| movaps xmm12,XMMWORD[((96+0))+rbp] |
| movaps xmm13,XMMWORD[((112+0))+rbp] |
| movaps xmm14,XMMWORD[((128+0))+rbp] |
| movaps xmm15,XMMWORD[((144+0))+rbp] |
| |
| |
| add rsp,288 + 160 + 32 |
| |
| |
| pop r9 |
| |
| mov QWORD[r9],r10 |
| mov QWORD[8+r9],r11 |
| pop r15 |
| |
| pop r14 |
| |
| pop r13 |
| |
| pop r12 |
| |
| pop rbx |
| |
| pop rbp |
| |
| mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
| mov rsi,QWORD[16+rsp] |
| ret |
| |
| $L$seal_sse_128: |
| |
| movdqu xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm1,xmm0 |
| movdqa xmm2,xmm0 |
| movdqu xmm4,XMMWORD[r9] |
| movdqa xmm5,xmm4 |
| movdqa xmm6,xmm4 |
| movdqu xmm8,XMMWORD[16+r9] |
| movdqa xmm9,xmm8 |
| movdqa xmm10,xmm8 |
| movdqu xmm14,XMMWORD[32+r9] |
| movdqa xmm12,xmm14 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa xmm13,xmm12 |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm7,xmm4 |
| movdqa xmm11,xmm8 |
| movdqa xmm15,xmm12 |
| mov r10,10 |
| |
| $L$seal_sse_128_rounds: |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm3 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm3 |
| DB 102,15,58,15,246,4 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,12 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm3 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm3 |
| DB 102,15,58,15,246,12 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,4 |
| |
| dec r10 |
| jnz NEAR $L$seal_sse_128_rounds |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm2,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,xmm7 |
| paddd xmm5,xmm7 |
| paddd xmm6,xmm7 |
| paddd xmm8,xmm11 |
| paddd xmm9,xmm11 |
| paddd xmm12,xmm15 |
| paddd xmm15,XMMWORD[$L$sse_inc] |
| paddd xmm13,xmm15 |
| |
| pand xmm2,XMMWORD[$L$clamp] |
| movdqa XMMWORD[(160+0)+rbp],xmm2 |
| movdqa XMMWORD[(160+16)+rbp],xmm6 |
| |
| mov r8,r8 |
| call poly_hash_ad_internal |
| jmp NEAR $L$seal_sse_128_tail_xor |
| $L$SEH_end_chacha20_poly1305_seal_nohw: |
| |
| |
| |
| global chacha20_poly1305_open_avx2 |
| |
| ALIGN 64 |
| chacha20_poly1305_open_avx2: |
| mov QWORD[8+rsp],rdi ;WIN64 prologue |
| mov QWORD[16+rsp],rsi |
| mov rax,rsp |
| $L$SEH_begin_chacha20_poly1305_open_avx2: |
| mov rdi,rcx |
| mov rsi,rdx |
| mov rdx,r8 |
| mov rcx,r9 |
| mov r8,QWORD[40+rsp] |
| mov r9,QWORD[48+rsp] |
| |
| |
| |
| _CET_ENDBR |
| push rbp |
| |
| push rbx |
| |
| push r12 |
| |
| push r13 |
| |
| push r14 |
| |
| push r15 |
| |
| |
| |
| push r9 |
| |
| sub rsp,288 + 160 + 32 |
| |
| |
| lea rbp,[32+rsp] |
| and rbp,-32 |
| |
| movaps XMMWORD[(0+0)+rbp],xmm6 |
| movaps XMMWORD[(16+0)+rbp],xmm7 |
| movaps XMMWORD[(32+0)+rbp],xmm8 |
| movaps XMMWORD[(48+0)+rbp],xmm9 |
| movaps XMMWORD[(64+0)+rbp],xmm10 |
| movaps XMMWORD[(80+0)+rbp],xmm11 |
| movaps XMMWORD[(96+0)+rbp],xmm12 |
| movaps XMMWORD[(112+0)+rbp],xmm13 |
| movaps XMMWORD[(128+0)+rbp],xmm14 |
| movaps XMMWORD[(144+0)+rbp],xmm15 |
| |
| mov rbx,rdx |
| mov QWORD[((0+160+32))+rbp],r8 |
| mov QWORD[((8+160+32))+rbp],rbx |
| |
| vzeroupper |
| vmovdqa ymm0,YMMWORD[$L$chacha20_consts] |
| vbroadcasti128 ymm4,XMMWORD[r9] |
| vbroadcasti128 ymm8,XMMWORD[16+r9] |
| vbroadcasti128 ymm12,XMMWORD[32+r9] |
| vpaddd ymm12,ymm12,YMMWORD[$L$avx2_init] |
| cmp rbx,6*32 |
| jbe NEAR $L$open_avx2_192 |
| cmp rbx,10*32 |
| jbe NEAR $L$open_avx2_320 |
| |
| vmovdqa YMMWORD[(160+64)+rbp],ymm4 |
| vmovdqa YMMWORD[(160+96)+rbp],ymm8 |
| vmovdqa YMMWORD[(160+160)+rbp],ymm12 |
| mov r10,10 |
| $L$open_avx2_init_rounds: |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,4 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,12 |
| |
| dec r10 |
| jne NEAR $L$open_avx2_init_rounds |
| vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] |
| vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| |
| vperm2i128 ymm3,ymm4,ymm0,0x02 |
| |
| vpand ymm3,ymm3,YMMWORD[$L$clamp] |
| vmovdqa YMMWORD[(160+0)+rbp],ymm3 |
| |
| vperm2i128 ymm0,ymm4,ymm0,0x13 |
| vperm2i128 ymm4,ymm12,ymm8,0x13 |
| |
| mov r8,r8 |
| call poly_hash_ad_internal |
| |
| xor rcx,rcx |
| $L$open_avx2_init_hash: |
| add r10,QWORD[((0+0))+rcx*1+rsi] |
| adc r11,QWORD[((8+0))+rcx*1+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| add rcx,16 |
| cmp rcx,2*32 |
| jne NEAR $L$open_avx2_init_hash |
| |
| vpxor ymm0,ymm0,YMMWORD[rsi] |
| vpxor ymm4,ymm4,YMMWORD[32+rsi] |
| |
| vmovdqu YMMWORD[rdi],ymm0 |
| vmovdqu YMMWORD[32+rdi],ymm4 |
| lea rsi,[64+rsi] |
| lea rdi,[64+rdi] |
| sub rbx,2*32 |
| $L$open_avx2_main_loop: |
| |
| cmp rbx,16*32 |
| jb NEAR $L$open_avx2_main_loop_done |
| vmovdqa ymm0,YMMWORD[$L$chacha20_consts] |
| vmovdqa ymm4,YMMWORD[((160+64))+rbp] |
| vmovdqa ymm8,YMMWORD[((160+96))+rbp] |
| vmovdqa ymm1,ymm0 |
| vmovdqa ymm5,ymm4 |
| vmovdqa ymm9,ymm8 |
| vmovdqa ymm2,ymm0 |
| vmovdqa ymm6,ymm4 |
| vmovdqa ymm10,ymm8 |
| vmovdqa ymm3,ymm0 |
| vmovdqa ymm7,ymm4 |
| vmovdqa ymm11,ymm8 |
| vmovdqa ymm12,YMMWORD[$L$avx2_inc] |
| vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] |
| vpaddd ymm14,ymm12,ymm15 |
| vpaddd ymm13,ymm12,ymm14 |
| vpaddd ymm12,ymm12,ymm13 |
| vmovdqa YMMWORD[(160+256)+rbp],ymm15 |
| vmovdqa YMMWORD[(160+224)+rbp],ymm14 |
| vmovdqa YMMWORD[(160+192)+rbp],ymm13 |
| vmovdqa YMMWORD[(160+160)+rbp],ymm12 |
| |
| xor rcx,rcx |
| $L$open_avx2_main_loop_rounds: |
| add r10,QWORD[((0+0))+rcx*1+rsi] |
| adc r11,QWORD[((8+0))+rcx*1+rsi] |
| adc r12,1 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol16] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,20 |
| vpslld ymm7,ymm7,32-20 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,20 |
| vpslld ymm6,ymm6,32-20 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,20 |
| vpslld ymm5,ymm5,32-20 |
| add r15,rax |
| adc r9,rdx |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,20 |
| vpslld ymm4,ymm4,32-20 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol8] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| add r10,QWORD[((0+16))+rcx*1+rsi] |
| adc r11,QWORD[((8+16))+rcx*1+rsi] |
| adc r12,1 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,25 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| vpslld ymm7,ymm7,32-25 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,25 |
| vpslld ymm6,ymm6,32-25 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,25 |
| vpslld ymm5,ymm5,32-25 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,25 |
| vpslld ymm4,ymm4,32-25 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[((160+128))+rbp] |
| vpalignr ymm7,ymm7,ymm7,4 |
| vpalignr ymm11,ymm11,ymm11,8 |
| vpalignr ymm15,ymm15,ymm15,12 |
| vpalignr ymm6,ymm6,ymm6,4 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm14,ymm14,ymm14,12 |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| vpalignr ymm5,ymm5,ymm5,4 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm13,ymm13,ymm13,12 |
| vpalignr ymm4,ymm4,ymm4,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol16] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| add r15,rax |
| adc r9,rdx |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,20 |
| vpslld ymm7,ymm7,32-20 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,20 |
| vpslld ymm6,ymm6,32-20 |
| vpxor ymm6,ymm6,ymm8 |
| add r10,QWORD[((0+32))+rcx*1+rsi] |
| adc r11,QWORD[((8+32))+rcx*1+rsi] |
| adc r12,1 |
| |
| lea rcx,[48+rcx] |
| vpsrld ymm8,ymm5,20 |
| vpslld ymm5,ymm5,32-20 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,20 |
| vpslld ymm4,ymm4,32-20 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol8] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,25 |
| vpslld ymm7,ymm7,32-25 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,25 |
| vpslld ymm6,ymm6,32-25 |
| vpxor ymm6,ymm6,ymm8 |
| add r15,rax |
| adc r9,rdx |
| vpsrld ymm8,ymm5,25 |
| vpslld ymm5,ymm5,32-25 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,25 |
| vpslld ymm4,ymm4,32-25 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[((160+128))+rbp] |
| vpalignr ymm7,ymm7,ymm7,12 |
| vpalignr ymm11,ymm11,ymm11,8 |
| vpalignr ymm15,ymm15,ymm15,4 |
| vpalignr ymm6,ymm6,ymm6,12 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm14,ymm14,ymm14,4 |
| vpalignr ymm5,ymm5,ymm5,12 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm13,ymm13,ymm13,4 |
| vpalignr ymm4,ymm4,ymm4,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| vpalignr ymm12,ymm12,ymm12,4 |
| |
| cmp rcx,10*6*8 |
| jne NEAR $L$open_avx2_main_loop_rounds |
| vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] |
| vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] |
| vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] |
| vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] |
| vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] |
| vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] |
| vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] |
| vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] |
| vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] |
| vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] |
| vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| |
| vmovdqa YMMWORD[(160+128)+rbp],ymm0 |
| add r10,QWORD[((0+480))+rsi] |
| adc r11,QWORD[((8+480))+rsi] |
| adc r12,1 |
| vperm2i128 ymm0,ymm7,ymm3,0x02 |
| vperm2i128 ymm7,ymm7,ymm3,0x13 |
| vperm2i128 ymm3,ymm15,ymm11,0x02 |
| vperm2i128 ymm11,ymm15,ymm11,0x13 |
| vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] |
| vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] |
| vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] |
| vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] |
| vmovdqu YMMWORD[(0+0)+rdi],ymm0 |
| vmovdqu YMMWORD[(32+0)+rdi],ymm3 |
| vmovdqu YMMWORD[(64+0)+rdi],ymm7 |
| vmovdqu YMMWORD[(96+0)+rdi],ymm11 |
| |
| vmovdqa ymm0,YMMWORD[((160+128))+rbp] |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| vperm2i128 ymm3,ymm6,ymm2,0x02 |
| vperm2i128 ymm6,ymm6,ymm2,0x13 |
| vperm2i128 ymm2,ymm14,ymm10,0x02 |
| vperm2i128 ymm10,ymm14,ymm10,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] |
| vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] |
| vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] |
| vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] |
| vmovdqu YMMWORD[(0+128)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+128)+rdi],ymm2 |
| vmovdqu YMMWORD[(64+128)+rdi],ymm6 |
| vmovdqu YMMWORD[(96+128)+rdi],ymm10 |
| add r10,QWORD[((0+480+16))+rsi] |
| adc r11,QWORD[((8+480+16))+rsi] |
| adc r12,1 |
| vperm2i128 ymm3,ymm5,ymm1,0x02 |
| vperm2i128 ymm5,ymm5,ymm1,0x13 |
| vperm2i128 ymm1,ymm13,ymm9,0x02 |
| vperm2i128 ymm9,ymm13,ymm9,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] |
| vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] |
| vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] |
| vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] |
| vmovdqu YMMWORD[(0+256)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+256)+rdi],ymm1 |
| vmovdqu YMMWORD[(64+256)+rdi],ymm5 |
| vmovdqu YMMWORD[(96+256)+rdi],ymm9 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| vperm2i128 ymm3,ymm4,ymm0,0x02 |
| vperm2i128 ymm4,ymm4,ymm0,0x13 |
| vperm2i128 ymm0,ymm12,ymm8,0x02 |
| vperm2i128 ymm8,ymm12,ymm8,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+384))+rsi] |
| vpxor ymm0,ymm0,YMMWORD[((32+384))+rsi] |
| vpxor ymm4,ymm4,YMMWORD[((64+384))+rsi] |
| vpxor ymm8,ymm8,YMMWORD[((96+384))+rsi] |
| vmovdqu YMMWORD[(0+384)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+384)+rdi],ymm0 |
| vmovdqu YMMWORD[(64+384)+rdi],ymm4 |
| vmovdqu YMMWORD[(96+384)+rdi],ymm8 |
| |
| lea rsi,[512+rsi] |
| lea rdi,[512+rdi] |
| sub rbx,16*32 |
| jmp NEAR $L$open_avx2_main_loop |
| $L$open_avx2_main_loop_done: |
| test rbx,rbx |
| vzeroupper |
| je NEAR $L$open_sse_finalize |
| |
| cmp rbx,12*32 |
| ja NEAR $L$open_avx2_tail_512 |
| cmp rbx,8*32 |
| ja NEAR $L$open_avx2_tail_384 |
| cmp rbx,4*32 |
| ja NEAR $L$open_avx2_tail_256 |
| vmovdqa ymm0,YMMWORD[$L$chacha20_consts] |
| vmovdqa ymm4,YMMWORD[((160+64))+rbp] |
| vmovdqa ymm8,YMMWORD[((160+96))+rbp] |
| vmovdqa ymm12,YMMWORD[$L$avx2_inc] |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| vmovdqa YMMWORD[(160+160)+rbp],ymm12 |
| |
| xor r8,r8 |
| mov rcx,rbx |
| and rcx,-16 |
| test rcx,rcx |
| je NEAR $L$open_avx2_tail_128_rounds |
| $L$open_avx2_tail_128_rounds_and_x1hash: |
| add r10,QWORD[((0+0))+r8*1+rsi] |
| adc r11,QWORD[((8+0))+r8*1+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| $L$open_avx2_tail_128_rounds: |
| add r8,16 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,4 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,12 |
| |
| cmp r8,rcx |
| jb NEAR $L$open_avx2_tail_128_rounds_and_x1hash |
| cmp r8,160 |
| jne NEAR $L$open_avx2_tail_128_rounds |
| vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] |
| vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| vperm2i128 ymm3,ymm4,ymm0,0x13 |
| vperm2i128 ymm0,ymm4,ymm0,0x02 |
| vperm2i128 ymm4,ymm12,ymm8,0x02 |
| vperm2i128 ymm12,ymm12,ymm8,0x13 |
| vmovdqa ymm8,ymm3 |
| |
| jmp NEAR $L$open_avx2_tail_128_xor |
| |
| $L$open_avx2_tail_256: |
| vmovdqa ymm0,YMMWORD[$L$chacha20_consts] |
| vmovdqa ymm4,YMMWORD[((160+64))+rbp] |
| vmovdqa ymm8,YMMWORD[((160+96))+rbp] |
| vmovdqa ymm1,ymm0 |
| vmovdqa ymm5,ymm4 |
| vmovdqa ymm9,ymm8 |
| vmovdqa ymm12,YMMWORD[$L$avx2_inc] |
| vpaddd ymm13,ymm12,YMMWORD[((160+160))+rbp] |
| vpaddd ymm12,ymm12,ymm13 |
| vmovdqa YMMWORD[(160+160)+rbp],ymm12 |
| vmovdqa YMMWORD[(160+192)+rbp],ymm13 |
| |
| mov QWORD[((160+128))+rbp],rbx |
| mov rcx,rbx |
| sub rcx,4*32 |
| shr rcx,4 |
| mov r8,10 |
| cmp rcx,10 |
| cmovg rcx,r8 |
| mov rbx,rsi |
| xor r8,r8 |
| $L$open_avx2_tail_256_rounds_and_x1hash: |
| add r10,QWORD[((0+0))+rbx] |
| adc r11,QWORD[((8+0))+rbx] |
| adc r12,1 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| add r15,rax |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rbx,[16+rbx] |
| $L$open_avx2_tail_256_rounds: |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,4 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,12 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,4 |
| |
| inc r8 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,12 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,4 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,12 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol16] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpsrld ymm3,ymm6,20 |
| vpslld ymm6,ymm6,12 |
| vpxor ymm6,ymm6,ymm3 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol8] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpslld ymm3,ymm6,7 |
| vpsrld ymm6,ymm6,25 |
| vpxor ymm6,ymm6,ymm3 |
| vpalignr ymm14,ymm14,ymm14,4 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm6,ymm6,ymm6,12 |
| |
| cmp r8,rcx |
| jb NEAR $L$open_avx2_tail_256_rounds_and_x1hash |
| cmp r8,10 |
| jne NEAR $L$open_avx2_tail_256_rounds |
| mov r8,rbx |
| sub rbx,rsi |
| mov rcx,rbx |
| mov rbx,QWORD[((160+128))+rbp] |
| $L$open_avx2_tail_256_hash: |
| add rcx,16 |
| cmp rcx,rbx |
| jg NEAR $L$open_avx2_tail_256_done |
| add r10,QWORD[((0+0))+r8] |
| adc r11,QWORD[((8+0))+r8] |
| adc r12,1 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| add r15,rax |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea r8,[16+r8] |
| jmp NEAR $L$open_avx2_tail_256_hash |
| $L$open_avx2_tail_256_done: |
| vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] |
| vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] |
| vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] |
| vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] |
| vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| vperm2i128 ymm3,ymm5,ymm1,0x02 |
| vperm2i128 ymm5,ymm5,ymm1,0x13 |
| vperm2i128 ymm1,ymm13,ymm9,0x02 |
| vperm2i128 ymm9,ymm13,ymm9,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] |
| vpxor ymm1,ymm1,YMMWORD[((32+0))+rsi] |
| vpxor ymm5,ymm5,YMMWORD[((64+0))+rsi] |
| vpxor ymm9,ymm9,YMMWORD[((96+0))+rsi] |
| vmovdqu YMMWORD[(0+0)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+0)+rdi],ymm1 |
| vmovdqu YMMWORD[(64+0)+rdi],ymm5 |
| vmovdqu YMMWORD[(96+0)+rdi],ymm9 |
| vperm2i128 ymm3,ymm4,ymm0,0x13 |
| vperm2i128 ymm0,ymm4,ymm0,0x02 |
| vperm2i128 ymm4,ymm12,ymm8,0x02 |
| vperm2i128 ymm12,ymm12,ymm8,0x13 |
| vmovdqa ymm8,ymm3 |
| |
| lea rsi,[128+rsi] |
| lea rdi,[128+rdi] |
| sub rbx,4*32 |
| jmp NEAR $L$open_avx2_tail_128_xor |
| |
| $L$open_avx2_tail_384: |
| vmovdqa ymm0,YMMWORD[$L$chacha20_consts] |
| vmovdqa ymm4,YMMWORD[((160+64))+rbp] |
| vmovdqa ymm8,YMMWORD[((160+96))+rbp] |
| vmovdqa ymm1,ymm0 |
| vmovdqa ymm5,ymm4 |
| vmovdqa ymm9,ymm8 |
| vmovdqa ymm2,ymm0 |
| vmovdqa ymm6,ymm4 |
| vmovdqa ymm10,ymm8 |
| vmovdqa ymm12,YMMWORD[$L$avx2_inc] |
| vpaddd ymm14,ymm12,YMMWORD[((160+160))+rbp] |
| vpaddd ymm13,ymm12,ymm14 |
| vpaddd ymm12,ymm12,ymm13 |
| vmovdqa YMMWORD[(160+160)+rbp],ymm12 |
| vmovdqa YMMWORD[(160+192)+rbp],ymm13 |
| vmovdqa YMMWORD[(160+224)+rbp],ymm14 |
| |
| mov QWORD[((160+128))+rbp],rbx |
| mov rcx,rbx |
| sub rcx,8*32 |
| shr rcx,4 |
| add rcx,6 |
| mov r8,10 |
| cmp rcx,10 |
| cmovg rcx,r8 |
| mov rbx,rsi |
| xor r8,r8 |
| $L$open_avx2_tail_384_rounds_and_x2hash: |
| add r10,QWORD[((0+0))+rbx] |
| adc r11,QWORD[((8+0))+rbx] |
| adc r12,1 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| add r15,rax |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rbx,[16+rbx] |
| $L$open_avx2_tail_384_rounds_and_x1hash: |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol16] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpsrld ymm3,ymm6,20 |
| vpslld ymm6,ymm6,12 |
| vpxor ymm6,ymm6,ymm3 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol8] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpslld ymm3,ymm6,7 |
| vpsrld ymm6,ymm6,25 |
| vpxor ymm6,ymm6,ymm3 |
| vpalignr ymm14,ymm14,ymm14,12 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm6,ymm6,ymm6,4 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,12 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,4 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,4 |
| add r10,QWORD[((0+0))+rbx] |
| adc r11,QWORD[((8+0))+rbx] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rbx,[16+rbx] |
| inc r8 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol16] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpsrld ymm3,ymm6,20 |
| vpslld ymm6,ymm6,12 |
| vpxor ymm6,ymm6,ymm3 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol8] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpslld ymm3,ymm6,7 |
| vpsrld ymm6,ymm6,25 |
| vpxor ymm6,ymm6,ymm3 |
| vpalignr ymm14,ymm14,ymm14,4 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm6,ymm6,ymm6,12 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,4 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,12 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,12 |
| |
| cmp r8,rcx |
| jb NEAR $L$open_avx2_tail_384_rounds_and_x2hash |
| cmp r8,10 |
| jne NEAR $L$open_avx2_tail_384_rounds_and_x1hash |
| mov r8,rbx |
| sub rbx,rsi |
| mov rcx,rbx |
| mov rbx,QWORD[((160+128))+rbp] |
| $L$open_avx2_384_tail_hash: |
| add rcx,16 |
| cmp rcx,rbx |
| jg NEAR $L$open_avx2_384_tail_done |
| add r10,QWORD[((0+0))+r8] |
| adc r11,QWORD[((8+0))+r8] |
| adc r12,1 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| add r15,rax |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea r8,[16+r8] |
| jmp NEAR $L$open_avx2_384_tail_hash |
| $L$open_avx2_384_tail_done: |
| vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] |
| vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] |
| vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] |
| vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] |
| vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] |
| vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] |
| vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] |
| vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| vperm2i128 ymm3,ymm6,ymm2,0x02 |
| vperm2i128 ymm6,ymm6,ymm2,0x13 |
| vperm2i128 ymm2,ymm14,ymm10,0x02 |
| vperm2i128 ymm10,ymm14,ymm10,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] |
| vpxor ymm2,ymm2,YMMWORD[((32+0))+rsi] |
| vpxor ymm6,ymm6,YMMWORD[((64+0))+rsi] |
| vpxor ymm10,ymm10,YMMWORD[((96+0))+rsi] |
| vmovdqu YMMWORD[(0+0)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+0)+rdi],ymm2 |
| vmovdqu YMMWORD[(64+0)+rdi],ymm6 |
| vmovdqu YMMWORD[(96+0)+rdi],ymm10 |
| vperm2i128 ymm3,ymm5,ymm1,0x02 |
| vperm2i128 ymm5,ymm5,ymm1,0x13 |
| vperm2i128 ymm1,ymm13,ymm9,0x02 |
| vperm2i128 ymm9,ymm13,ymm9,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] |
| vpxor ymm1,ymm1,YMMWORD[((32+128))+rsi] |
| vpxor ymm5,ymm5,YMMWORD[((64+128))+rsi] |
| vpxor ymm9,ymm9,YMMWORD[((96+128))+rsi] |
| vmovdqu YMMWORD[(0+128)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+128)+rdi],ymm1 |
| vmovdqu YMMWORD[(64+128)+rdi],ymm5 |
| vmovdqu YMMWORD[(96+128)+rdi],ymm9 |
| vperm2i128 ymm3,ymm4,ymm0,0x13 |
| vperm2i128 ymm0,ymm4,ymm0,0x02 |
| vperm2i128 ymm4,ymm12,ymm8,0x02 |
| vperm2i128 ymm12,ymm12,ymm8,0x13 |
| vmovdqa ymm8,ymm3 |
| |
| lea rsi,[256+rsi] |
| lea rdi,[256+rdi] |
| sub rbx,8*32 |
| jmp NEAR $L$open_avx2_tail_128_xor |
| |
| $L$open_avx2_tail_512: |
| vmovdqa ymm0,YMMWORD[$L$chacha20_consts] |
| vmovdqa ymm4,YMMWORD[((160+64))+rbp] |
| vmovdqa ymm8,YMMWORD[((160+96))+rbp] |
| vmovdqa ymm1,ymm0 |
| vmovdqa ymm5,ymm4 |
| vmovdqa ymm9,ymm8 |
| vmovdqa ymm2,ymm0 |
| vmovdqa ymm6,ymm4 |
| vmovdqa ymm10,ymm8 |
| vmovdqa ymm3,ymm0 |
| vmovdqa ymm7,ymm4 |
| vmovdqa ymm11,ymm8 |
| vmovdqa ymm12,YMMWORD[$L$avx2_inc] |
| vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] |
| vpaddd ymm14,ymm12,ymm15 |
| vpaddd ymm13,ymm12,ymm14 |
| vpaddd ymm12,ymm12,ymm13 |
| vmovdqa YMMWORD[(160+256)+rbp],ymm15 |
| vmovdqa YMMWORD[(160+224)+rbp],ymm14 |
| vmovdqa YMMWORD[(160+192)+rbp],ymm13 |
| vmovdqa YMMWORD[(160+160)+rbp],ymm12 |
| |
| xor rcx,rcx |
| mov r8,rsi |
| $L$open_avx2_tail_512_rounds_and_x2hash: |
| add r10,QWORD[((0+0))+r8] |
| adc r11,QWORD[((8+0))+r8] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea r8,[16+r8] |
| $L$open_avx2_tail_512_rounds_and_x1hash: |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol16] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,20 |
| vpslld ymm7,ymm7,32-20 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,20 |
| vpslld ymm6,ymm6,32-20 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,20 |
| vpslld ymm5,ymm5,32-20 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,20 |
| vpslld ymm4,ymm4,32-20 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol8] |
| vpaddd ymm3,ymm3,ymm7 |
| add r10,QWORD[((0+0))+r8] |
| adc r11,QWORD[((8+0))+r8] |
| adc r12,1 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| add r15,rax |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,25 |
| vpslld ymm7,ymm7,32-25 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,25 |
| vpslld ymm6,ymm6,32-25 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,25 |
| vpslld ymm5,ymm5,32-25 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,25 |
| vpslld ymm4,ymm4,32-25 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[((160+128))+rbp] |
| vpalignr ymm7,ymm7,ymm7,4 |
| vpalignr ymm11,ymm11,ymm11,8 |
| vpalignr ymm15,ymm15,ymm15,12 |
| vpalignr ymm6,ymm6,ymm6,4 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm14,ymm14,ymm14,12 |
| vpalignr ymm5,ymm5,ymm5,4 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm13,ymm13,ymm13,12 |
| vpalignr ymm4,ymm4,ymm4,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol16] |
| vpaddd ymm3,ymm3,ymm7 |
| add r10,QWORD[((0+16))+r8] |
| adc r11,QWORD[((8+16))+r8] |
| adc r12,1 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| add r15,rax |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea r8,[32+r8] |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,20 |
| vpslld ymm7,ymm7,32-20 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,20 |
| vpslld ymm6,ymm6,32-20 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,20 |
| vpslld ymm5,ymm5,32-20 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,20 |
| vpslld ymm4,ymm4,32-20 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol8] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,25 |
| vpslld ymm7,ymm7,32-25 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,25 |
| vpslld ymm6,ymm6,32-25 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,25 |
| vpslld ymm5,ymm5,32-25 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,25 |
| vpslld ymm4,ymm4,32-25 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[((160+128))+rbp] |
| vpalignr ymm7,ymm7,ymm7,12 |
| vpalignr ymm11,ymm11,ymm11,8 |
| vpalignr ymm15,ymm15,ymm15,4 |
| vpalignr ymm6,ymm6,ymm6,12 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm14,ymm14,ymm14,4 |
| vpalignr ymm5,ymm5,ymm5,12 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm13,ymm13,ymm13,4 |
| vpalignr ymm4,ymm4,ymm4,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm12,ymm12,ymm12,4 |
| |
| inc rcx |
| cmp rcx,4 |
| jl NEAR $L$open_avx2_tail_512_rounds_and_x2hash |
| cmp rcx,10 |
| jne NEAR $L$open_avx2_tail_512_rounds_and_x1hash |
| mov rcx,rbx |
| sub rcx,12*32 |
| and rcx,-16 |
| $L$open_avx2_tail_512_hash: |
| test rcx,rcx |
| je NEAR $L$open_avx2_tail_512_done |
| add r10,QWORD[((0+0))+r8] |
| adc r11,QWORD[((8+0))+r8] |
| adc r12,1 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| add r15,rax |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea r8,[16+r8] |
| sub rcx,2*8 |
| jmp NEAR $L$open_avx2_tail_512_hash |
| $L$open_avx2_tail_512_done: |
| vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] |
| vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] |
| vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] |
| vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] |
| vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] |
| vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] |
| vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] |
| vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] |
| vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] |
| vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] |
| vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| |
| vmovdqa YMMWORD[(160+128)+rbp],ymm0 |
| vperm2i128 ymm0,ymm7,ymm3,0x02 |
| vperm2i128 ymm7,ymm7,ymm3,0x13 |
| vperm2i128 ymm3,ymm15,ymm11,0x02 |
| vperm2i128 ymm11,ymm15,ymm11,0x13 |
| vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] |
| vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] |
| vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] |
| vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] |
| vmovdqu YMMWORD[(0+0)+rdi],ymm0 |
| vmovdqu YMMWORD[(32+0)+rdi],ymm3 |
| vmovdqu YMMWORD[(64+0)+rdi],ymm7 |
| vmovdqu YMMWORD[(96+0)+rdi],ymm11 |
| |
| vmovdqa ymm0,YMMWORD[((160+128))+rbp] |
| vperm2i128 ymm3,ymm6,ymm2,0x02 |
| vperm2i128 ymm6,ymm6,ymm2,0x13 |
| vperm2i128 ymm2,ymm14,ymm10,0x02 |
| vperm2i128 ymm10,ymm14,ymm10,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] |
| vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] |
| vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] |
| vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] |
| vmovdqu YMMWORD[(0+128)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+128)+rdi],ymm2 |
| vmovdqu YMMWORD[(64+128)+rdi],ymm6 |
| vmovdqu YMMWORD[(96+128)+rdi],ymm10 |
| vperm2i128 ymm3,ymm5,ymm1,0x02 |
| vperm2i128 ymm5,ymm5,ymm1,0x13 |
| vperm2i128 ymm1,ymm13,ymm9,0x02 |
| vperm2i128 ymm9,ymm13,ymm9,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] |
| vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] |
| vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] |
| vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] |
| vmovdqu YMMWORD[(0+256)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+256)+rdi],ymm1 |
| vmovdqu YMMWORD[(64+256)+rdi],ymm5 |
| vmovdqu YMMWORD[(96+256)+rdi],ymm9 |
| vperm2i128 ymm3,ymm4,ymm0,0x13 |
| vperm2i128 ymm0,ymm4,ymm0,0x02 |
| vperm2i128 ymm4,ymm12,ymm8,0x02 |
| vperm2i128 ymm12,ymm12,ymm8,0x13 |
| vmovdqa ymm8,ymm3 |
| |
| lea rsi,[384+rsi] |
| lea rdi,[384+rdi] |
| sub rbx,12*32 |
| $L$open_avx2_tail_128_xor: |
| cmp rbx,32 |
| jb NEAR $L$open_avx2_tail_32_xor |
| sub rbx,32 |
| vpxor ymm0,ymm0,YMMWORD[rsi] |
| vmovdqu YMMWORD[rdi],ymm0 |
| lea rsi,[32+rsi] |
| lea rdi,[32+rdi] |
| vmovdqa ymm0,ymm4 |
| vmovdqa ymm4,ymm8 |
| vmovdqa ymm8,ymm12 |
| jmp NEAR $L$open_avx2_tail_128_xor |
| $L$open_avx2_tail_32_xor: |
| cmp rbx,16 |
| vmovdqa xmm1,xmm0 |
| jb NEAR $L$open_avx2_exit |
| sub rbx,16 |
| |
| vpxor xmm1,xmm0,XMMWORD[rsi] |
| vmovdqu XMMWORD[rdi],xmm1 |
| lea rsi,[16+rsi] |
| lea rdi,[16+rdi] |
| vperm2i128 ymm0,ymm0,ymm0,0x11 |
| vmovdqa xmm1,xmm0 |
| $L$open_avx2_exit: |
| vzeroupper |
| jmp NEAR $L$open_sse_tail_16 |
| |
| $L$open_avx2_192: |
| vmovdqa ymm1,ymm0 |
| vmovdqa ymm2,ymm0 |
| vmovdqa ymm5,ymm4 |
| vmovdqa ymm6,ymm4 |
| vmovdqa ymm9,ymm8 |
| vmovdqa ymm10,ymm8 |
| vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] |
| vmovdqa ymm11,ymm12 |
| vmovdqa ymm15,ymm13 |
| mov r10,10 |
| $L$open_avx2_192_rounds: |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,4 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,12 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,4 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,12 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,4 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,12 |
| |
| dec r10 |
| jne NEAR $L$open_avx2_192_rounds |
| vpaddd ymm0,ymm0,ymm2 |
| vpaddd ymm1,ymm1,ymm2 |
| vpaddd ymm4,ymm4,ymm6 |
| vpaddd ymm5,ymm5,ymm6 |
| vpaddd ymm8,ymm8,ymm10 |
| vpaddd ymm9,ymm9,ymm10 |
| vpaddd ymm12,ymm12,ymm11 |
| vpaddd ymm13,ymm13,ymm15 |
| vperm2i128 ymm3,ymm4,ymm0,0x02 |
| |
| vpand ymm3,ymm3,YMMWORD[$L$clamp] |
| vmovdqa YMMWORD[(160+0)+rbp],ymm3 |
| |
| vperm2i128 ymm0,ymm4,ymm0,0x13 |
| vperm2i128 ymm4,ymm12,ymm8,0x13 |
| vperm2i128 ymm8,ymm5,ymm1,0x02 |
| vperm2i128 ymm12,ymm13,ymm9,0x02 |
| vperm2i128 ymm1,ymm5,ymm1,0x13 |
| vperm2i128 ymm5,ymm13,ymm9,0x13 |
| $L$open_avx2_short: |
| mov r8,r8 |
| call poly_hash_ad_internal |
| $L$open_avx2_short_hash_and_xor_loop: |
| cmp rbx,32 |
| jb NEAR $L$open_avx2_short_tail_32 |
| sub rbx,32 |
| add r10,QWORD[((0+0))+rsi] |
| adc r11,QWORD[((8+0))+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| add r10,QWORD[((0+16))+rsi] |
| adc r11,QWORD[((8+16))+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| |
| vpxor ymm0,ymm0,YMMWORD[rsi] |
| vmovdqu YMMWORD[rdi],ymm0 |
| lea rsi,[32+rsi] |
| lea rdi,[32+rdi] |
| |
| vmovdqa ymm0,ymm4 |
| vmovdqa ymm4,ymm8 |
| vmovdqa ymm8,ymm12 |
| vmovdqa ymm12,ymm1 |
| vmovdqa ymm1,ymm5 |
| vmovdqa ymm5,ymm9 |
| vmovdqa ymm9,ymm13 |
| vmovdqa ymm13,ymm2 |
| vmovdqa ymm2,ymm6 |
| jmp NEAR $L$open_avx2_short_hash_and_xor_loop |
| $L$open_avx2_short_tail_32: |
| cmp rbx,16 |
| vmovdqa xmm1,xmm0 |
| jb NEAR $L$open_avx2_short_tail_32_exit |
| sub rbx,16 |
| add r10,QWORD[((0+0))+rsi] |
| adc r11,QWORD[((8+0))+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| vpxor xmm3,xmm0,XMMWORD[rsi] |
| vmovdqu XMMWORD[rdi],xmm3 |
| lea rsi,[16+rsi] |
| lea rdi,[16+rdi] |
| vextracti128 xmm1,ymm0,1 |
| $L$open_avx2_short_tail_32_exit: |
| vzeroupper |
| jmp NEAR $L$open_sse_tail_16 |
| |
| $L$open_avx2_320: |
| vmovdqa ymm1,ymm0 |
| vmovdqa ymm2,ymm0 |
| vmovdqa ymm5,ymm4 |
| vmovdqa ymm6,ymm4 |
| vmovdqa ymm9,ymm8 |
| vmovdqa ymm10,ymm8 |
| vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] |
| vpaddd ymm14,ymm13,YMMWORD[$L$avx2_inc] |
| vmovdqa ymm7,ymm4 |
| vmovdqa ymm11,ymm8 |
| vmovdqa YMMWORD[(160+160)+rbp],ymm12 |
| vmovdqa YMMWORD[(160+192)+rbp],ymm13 |
| vmovdqa YMMWORD[(160+224)+rbp],ymm14 |
| mov r10,10 |
| $L$open_avx2_320_rounds: |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,4 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,12 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,4 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol16] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpsrld ymm3,ymm6,20 |
| vpslld ymm6,ymm6,12 |
| vpxor ymm6,ymm6,ymm3 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol8] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpslld ymm3,ymm6,7 |
| vpsrld ymm6,ymm6,25 |
| vpxor ymm6,ymm6,ymm3 |
| vpalignr ymm14,ymm14,ymm14,12 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm6,ymm6,ymm6,4 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,12 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,4 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,12 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol16] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpsrld ymm3,ymm6,20 |
| vpslld ymm6,ymm6,12 |
| vpxor ymm6,ymm6,ymm3 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol8] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpslld ymm3,ymm6,7 |
| vpsrld ymm6,ymm6,25 |
| vpxor ymm6,ymm6,ymm3 |
| vpalignr ymm14,ymm14,ymm14,4 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm6,ymm6,ymm6,12 |
| |
| dec r10 |
| jne NEAR $L$open_avx2_320_rounds |
| vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm4,ymm4,ymm7 |
| vpaddd ymm5,ymm5,ymm7 |
| vpaddd ymm6,ymm6,ymm7 |
| vpaddd ymm8,ymm8,ymm11 |
| vpaddd ymm9,ymm9,ymm11 |
| vpaddd ymm10,ymm10,ymm11 |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] |
| vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] |
| vperm2i128 ymm3,ymm4,ymm0,0x02 |
| |
| vpand ymm3,ymm3,YMMWORD[$L$clamp] |
| vmovdqa YMMWORD[(160+0)+rbp],ymm3 |
| |
| vperm2i128 ymm0,ymm4,ymm0,0x13 |
| vperm2i128 ymm4,ymm12,ymm8,0x13 |
| vperm2i128 ymm8,ymm5,ymm1,0x02 |
| vperm2i128 ymm12,ymm13,ymm9,0x02 |
| vperm2i128 ymm1,ymm5,ymm1,0x13 |
| vperm2i128 ymm5,ymm13,ymm9,0x13 |
| vperm2i128 ymm9,ymm6,ymm2,0x02 |
| vperm2i128 ymm13,ymm14,ymm10,0x02 |
| vperm2i128 ymm2,ymm6,ymm2,0x13 |
| vperm2i128 ymm6,ymm14,ymm10,0x13 |
| jmp NEAR $L$open_avx2_short |
| $L$SEH_end_chacha20_poly1305_open_avx2: |
| |
| |
| |
| global chacha20_poly1305_seal_avx2 |
| |
| ALIGN 64 |
| chacha20_poly1305_seal_avx2: |
| mov QWORD[8+rsp],rdi ;WIN64 prologue |
| mov QWORD[16+rsp],rsi |
| mov rax,rsp |
| $L$SEH_begin_chacha20_poly1305_seal_avx2: |
| mov rdi,rcx |
| mov rsi,rdx |
| mov rdx,r8 |
| mov rcx,r9 |
| mov r8,QWORD[40+rsp] |
| mov r9,QWORD[48+rsp] |
| |
| |
| |
| _CET_ENDBR |
| push rbp |
| |
| push rbx |
| |
| push r12 |
| |
| push r13 |
| |
| push r14 |
| |
| push r15 |
| |
| |
| |
| push r9 |
| |
| sub rsp,288 + 160 + 32 |
| |
| lea rbp,[32+rsp] |
| and rbp,-32 |
| |
| movaps XMMWORD[(0+0)+rbp],xmm6 |
| movaps XMMWORD[(16+0)+rbp],xmm7 |
| movaps XMMWORD[(32+0)+rbp],xmm8 |
| movaps XMMWORD[(48+0)+rbp],xmm9 |
| movaps XMMWORD[(64+0)+rbp],xmm10 |
| movaps XMMWORD[(80+0)+rbp],xmm11 |
| movaps XMMWORD[(96+0)+rbp],xmm12 |
| movaps XMMWORD[(112+0)+rbp],xmm13 |
| movaps XMMWORD[(128+0)+rbp],xmm14 |
| movaps XMMWORD[(144+0)+rbp],xmm15 |
| |
| mov rbx,QWORD[56+r9] |
| add rbx,rdx |
| mov QWORD[((0+160+32))+rbp],r8 |
| mov QWORD[((8+160+32))+rbp],rbx |
| mov rbx,rdx |
| |
| vzeroupper |
| vmovdqa ymm0,YMMWORD[$L$chacha20_consts] |
| vbroadcasti128 ymm4,XMMWORD[r9] |
| vbroadcasti128 ymm8,XMMWORD[16+r9] |
| vbroadcasti128 ymm12,XMMWORD[32+r9] |
| vpaddd ymm12,ymm12,YMMWORD[$L$avx2_init] |
| cmp rbx,6*32 |
| jbe NEAR $L$seal_avx2_192 |
| cmp rbx,10*32 |
| jbe NEAR $L$seal_avx2_320 |
| vmovdqa ymm1,ymm0 |
| vmovdqa ymm2,ymm0 |
| vmovdqa ymm3,ymm0 |
| vmovdqa ymm5,ymm4 |
| vmovdqa ymm6,ymm4 |
| vmovdqa ymm7,ymm4 |
| vmovdqa YMMWORD[(160+64)+rbp],ymm4 |
| vmovdqa ymm9,ymm8 |
| vmovdqa ymm10,ymm8 |
| vmovdqa ymm11,ymm8 |
| vmovdqa YMMWORD[(160+96)+rbp],ymm8 |
| vmovdqa ymm15,ymm12 |
| vpaddd ymm14,ymm15,YMMWORD[$L$avx2_inc] |
| vpaddd ymm13,ymm14,YMMWORD[$L$avx2_inc] |
| vpaddd ymm12,ymm13,YMMWORD[$L$avx2_inc] |
| vmovdqa YMMWORD[(160+160)+rbp],ymm12 |
| vmovdqa YMMWORD[(160+192)+rbp],ymm13 |
| vmovdqa YMMWORD[(160+224)+rbp],ymm14 |
| vmovdqa YMMWORD[(160+256)+rbp],ymm15 |
| mov r10,10 |
| $L$seal_avx2_init_rounds: |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol16] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,20 |
| vpslld ymm7,ymm7,32-20 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,20 |
| vpslld ymm6,ymm6,32-20 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,20 |
| vpslld ymm5,ymm5,32-20 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,20 |
| vpslld ymm4,ymm4,32-20 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol8] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,25 |
| vpslld ymm7,ymm7,32-25 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,25 |
| vpslld ymm6,ymm6,32-25 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,25 |
| vpslld ymm5,ymm5,32-25 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,25 |
| vpslld ymm4,ymm4,32-25 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[((160+128))+rbp] |
| vpalignr ymm7,ymm7,ymm7,4 |
| vpalignr ymm11,ymm11,ymm11,8 |
| vpalignr ymm15,ymm15,ymm15,12 |
| vpalignr ymm6,ymm6,ymm6,4 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm14,ymm14,ymm14,12 |
| vpalignr ymm5,ymm5,ymm5,4 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm13,ymm13,ymm13,12 |
| vpalignr ymm4,ymm4,ymm4,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol16] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,20 |
| vpslld ymm7,ymm7,32-20 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,20 |
| vpslld ymm6,ymm6,32-20 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,20 |
| vpslld ymm5,ymm5,32-20 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,20 |
| vpslld ymm4,ymm4,32-20 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol8] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,25 |
| vpslld ymm7,ymm7,32-25 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,25 |
| vpslld ymm6,ymm6,32-25 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,25 |
| vpslld ymm5,ymm5,32-25 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,25 |
| vpslld ymm4,ymm4,32-25 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[((160+128))+rbp] |
| vpalignr ymm7,ymm7,ymm7,12 |
| vpalignr ymm11,ymm11,ymm11,8 |
| vpalignr ymm15,ymm15,ymm15,4 |
| vpalignr ymm6,ymm6,ymm6,12 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm14,ymm14,ymm14,4 |
| vpalignr ymm5,ymm5,ymm5,12 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm13,ymm13,ymm13,4 |
| vpalignr ymm4,ymm4,ymm4,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm12,ymm12,ymm12,4 |
| |
| dec r10 |
| jnz NEAR $L$seal_avx2_init_rounds |
| vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] |
| vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] |
| vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] |
| vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] |
| vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] |
| vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] |
| vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] |
| vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] |
| vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] |
| vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] |
| vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| |
| vperm2i128 ymm11,ymm15,ymm11,0x13 |
| vperm2i128 ymm15,ymm7,ymm3,0x02 |
| vperm2i128 ymm3,ymm7,ymm3,0x13 |
| vpand ymm15,ymm15,YMMWORD[$L$clamp] |
| vmovdqa YMMWORD[(160+0)+rbp],ymm15 |
| mov r8,r8 |
| call poly_hash_ad_internal |
| |
| vpxor ymm3,ymm3,YMMWORD[rsi] |
| vpxor ymm11,ymm11,YMMWORD[32+rsi] |
| vmovdqu YMMWORD[rdi],ymm3 |
| vmovdqu YMMWORD[32+rdi],ymm11 |
| vperm2i128 ymm15,ymm6,ymm2,0x02 |
| vperm2i128 ymm6,ymm6,ymm2,0x13 |
| vperm2i128 ymm2,ymm14,ymm10,0x02 |
| vperm2i128 ymm10,ymm14,ymm10,0x13 |
| vpxor ymm15,ymm15,YMMWORD[((0+64))+rsi] |
| vpxor ymm2,ymm2,YMMWORD[((32+64))+rsi] |
| vpxor ymm6,ymm6,YMMWORD[((64+64))+rsi] |
| vpxor ymm10,ymm10,YMMWORD[((96+64))+rsi] |
| vmovdqu YMMWORD[(0+64)+rdi],ymm15 |
| vmovdqu YMMWORD[(32+64)+rdi],ymm2 |
| vmovdqu YMMWORD[(64+64)+rdi],ymm6 |
| vmovdqu YMMWORD[(96+64)+rdi],ymm10 |
| vperm2i128 ymm15,ymm5,ymm1,0x02 |
| vperm2i128 ymm5,ymm5,ymm1,0x13 |
| vperm2i128 ymm1,ymm13,ymm9,0x02 |
| vperm2i128 ymm9,ymm13,ymm9,0x13 |
| vpxor ymm15,ymm15,YMMWORD[((0+192))+rsi] |
| vpxor ymm1,ymm1,YMMWORD[((32+192))+rsi] |
| vpxor ymm5,ymm5,YMMWORD[((64+192))+rsi] |
| vpxor ymm9,ymm9,YMMWORD[((96+192))+rsi] |
| vmovdqu YMMWORD[(0+192)+rdi],ymm15 |
| vmovdqu YMMWORD[(32+192)+rdi],ymm1 |
| vmovdqu YMMWORD[(64+192)+rdi],ymm5 |
| vmovdqu YMMWORD[(96+192)+rdi],ymm9 |
| vperm2i128 ymm15,ymm4,ymm0,0x13 |
| vperm2i128 ymm0,ymm4,ymm0,0x02 |
| vperm2i128 ymm4,ymm12,ymm8,0x02 |
| vperm2i128 ymm12,ymm12,ymm8,0x13 |
| vmovdqa ymm8,ymm15 |
| |
| lea rsi,[320+rsi] |
| sub rbx,10*32 |
| mov rcx,10*32 |
| cmp rbx,4*32 |
| jbe NEAR $L$seal_avx2_short_hash_remainder |
| vpxor ymm0,ymm0,YMMWORD[rsi] |
| vpxor ymm4,ymm4,YMMWORD[32+rsi] |
| vpxor ymm8,ymm8,YMMWORD[64+rsi] |
| vpxor ymm12,ymm12,YMMWORD[96+rsi] |
| vmovdqu YMMWORD[320+rdi],ymm0 |
| vmovdqu YMMWORD[352+rdi],ymm4 |
| vmovdqu YMMWORD[384+rdi],ymm8 |
| vmovdqu YMMWORD[416+rdi],ymm12 |
| lea rsi,[128+rsi] |
| sub rbx,4*32 |
| mov rcx,8 |
| mov r8,2 |
| cmp rbx,4*32 |
| jbe NEAR $L$seal_avx2_tail_128 |
| cmp rbx,8*32 |
| jbe NEAR $L$seal_avx2_tail_256 |
| cmp rbx,12*32 |
| jbe NEAR $L$seal_avx2_tail_384 |
| cmp rbx,16*32 |
| jbe NEAR $L$seal_avx2_tail_512 |
| vmovdqa ymm0,YMMWORD[$L$chacha20_consts] |
| vmovdqa ymm4,YMMWORD[((160+64))+rbp] |
| vmovdqa ymm8,YMMWORD[((160+96))+rbp] |
| vmovdqa ymm1,ymm0 |
| vmovdqa ymm5,ymm4 |
| vmovdqa ymm9,ymm8 |
| vmovdqa ymm2,ymm0 |
| vmovdqa ymm6,ymm4 |
| vmovdqa ymm10,ymm8 |
| vmovdqa ymm3,ymm0 |
| vmovdqa ymm7,ymm4 |
| vmovdqa ymm11,ymm8 |
| vmovdqa ymm12,YMMWORD[$L$avx2_inc] |
| vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] |
| vpaddd ymm14,ymm12,ymm15 |
| vpaddd ymm13,ymm12,ymm14 |
| vpaddd ymm12,ymm12,ymm13 |
| vmovdqa YMMWORD[(160+256)+rbp],ymm15 |
| vmovdqa YMMWORD[(160+224)+rbp],ymm14 |
| vmovdqa YMMWORD[(160+192)+rbp],ymm13 |
| vmovdqa YMMWORD[(160+160)+rbp],ymm12 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol16] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,20 |
| vpslld ymm7,ymm7,32-20 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,20 |
| vpslld ymm6,ymm6,32-20 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,20 |
| vpslld ymm5,ymm5,32-20 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,20 |
| vpslld ymm4,ymm4,32-20 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol8] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,25 |
| vpslld ymm7,ymm7,32-25 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,25 |
| vpslld ymm6,ymm6,32-25 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,25 |
| vpslld ymm5,ymm5,32-25 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,25 |
| vpslld ymm4,ymm4,32-25 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[((160+128))+rbp] |
| vpalignr ymm7,ymm7,ymm7,4 |
| vpalignr ymm11,ymm11,ymm11,8 |
| vpalignr ymm15,ymm15,ymm15,12 |
| vpalignr ymm6,ymm6,ymm6,4 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm14,ymm14,ymm14,12 |
| vpalignr ymm5,ymm5,ymm5,4 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm13,ymm13,ymm13,12 |
| vpalignr ymm4,ymm4,ymm4,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol16] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,20 |
| vpslld ymm7,ymm7,32-20 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,20 |
| vpslld ymm6,ymm6,32-20 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,20 |
| vpslld ymm5,ymm5,32-20 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,20 |
| vpslld ymm4,ymm4,32-20 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol8] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,25 |
| vpslld ymm7,ymm7,32-25 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,25 |
| vpslld ymm6,ymm6,32-25 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,25 |
| vpslld ymm5,ymm5,32-25 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,25 |
| vpslld ymm4,ymm4,32-25 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[((160+128))+rbp] |
| vpalignr ymm7,ymm7,ymm7,12 |
| vpalignr ymm11,ymm11,ymm11,8 |
| vpalignr ymm15,ymm15,ymm15,4 |
| vpalignr ymm6,ymm6,ymm6,12 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm14,ymm14,ymm14,4 |
| vpalignr ymm5,ymm5,ymm5,12 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm13,ymm13,ymm13,4 |
| vpalignr ymm4,ymm4,ymm4,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm12,ymm12,ymm12,4 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol16] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,20 |
| vpslld ymm7,ymm7,32-20 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,20 |
| vpslld ymm6,ymm6,32-20 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,20 |
| vpslld ymm5,ymm5,32-20 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,20 |
| vpslld ymm4,ymm4,32-20 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol8] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| |
| sub rdi,16 |
| mov rcx,9 |
| jmp NEAR $L$seal_avx2_main_loop_rounds_entry |
| ALIGN 32 |
| $L$seal_avx2_main_loop: |
| vmovdqa ymm0,YMMWORD[$L$chacha20_consts] |
| vmovdqa ymm4,YMMWORD[((160+64))+rbp] |
| vmovdqa ymm8,YMMWORD[((160+96))+rbp] |
| vmovdqa ymm1,ymm0 |
| vmovdqa ymm5,ymm4 |
| vmovdqa ymm9,ymm8 |
| vmovdqa ymm2,ymm0 |
| vmovdqa ymm6,ymm4 |
| vmovdqa ymm10,ymm8 |
| vmovdqa ymm3,ymm0 |
| vmovdqa ymm7,ymm4 |
| vmovdqa ymm11,ymm8 |
| vmovdqa ymm12,YMMWORD[$L$avx2_inc] |
| vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] |
| vpaddd ymm14,ymm12,ymm15 |
| vpaddd ymm13,ymm12,ymm14 |
| vpaddd ymm12,ymm12,ymm13 |
| vmovdqa YMMWORD[(160+256)+rbp],ymm15 |
| vmovdqa YMMWORD[(160+224)+rbp],ymm14 |
| vmovdqa YMMWORD[(160+192)+rbp],ymm13 |
| vmovdqa YMMWORD[(160+160)+rbp],ymm12 |
| |
| mov rcx,10 |
| ALIGN 32 |
| $L$seal_avx2_main_loop_rounds: |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol16] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,20 |
| vpslld ymm7,ymm7,32-20 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,20 |
| vpslld ymm6,ymm6,32-20 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,20 |
| vpslld ymm5,ymm5,32-20 |
| add r15,rax |
| adc r9,rdx |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,20 |
| vpslld ymm4,ymm4,32-20 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol8] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| $L$seal_avx2_main_loop_rounds_entry: |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| add r10,QWORD[((0+16))+rdi] |
| adc r11,QWORD[((8+16))+rdi] |
| adc r12,1 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,25 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| vpslld ymm7,ymm7,32-25 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,25 |
| vpslld ymm6,ymm6,32-25 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,25 |
| vpslld ymm5,ymm5,32-25 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,25 |
| vpslld ymm4,ymm4,32-25 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[((160+128))+rbp] |
| vpalignr ymm7,ymm7,ymm7,4 |
| vpalignr ymm11,ymm11,ymm11,8 |
| vpalignr ymm15,ymm15,ymm15,12 |
| vpalignr ymm6,ymm6,ymm6,4 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm14,ymm14,ymm14,12 |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| vpalignr ymm5,ymm5,ymm5,4 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm13,ymm13,ymm13,12 |
| vpalignr ymm4,ymm4,ymm4,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol16] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| add r15,rax |
| adc r9,rdx |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,20 |
| vpslld ymm7,ymm7,32-20 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,20 |
| vpslld ymm6,ymm6,32-20 |
| vpxor ymm6,ymm6,ymm8 |
| add r10,QWORD[((0+32))+rdi] |
| adc r11,QWORD[((8+32))+rdi] |
| adc r12,1 |
| |
| lea rdi,[48+rdi] |
| vpsrld ymm8,ymm5,20 |
| vpslld ymm5,ymm5,32-20 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,20 |
| vpslld ymm4,ymm4,32-20 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol8] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,25 |
| vpslld ymm7,ymm7,32-25 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,25 |
| vpslld ymm6,ymm6,32-25 |
| vpxor ymm6,ymm6,ymm8 |
| add r15,rax |
| adc r9,rdx |
| vpsrld ymm8,ymm5,25 |
| vpslld ymm5,ymm5,32-25 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,25 |
| vpslld ymm4,ymm4,32-25 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[((160+128))+rbp] |
| vpalignr ymm7,ymm7,ymm7,12 |
| vpalignr ymm11,ymm11,ymm11,8 |
| vpalignr ymm15,ymm15,ymm15,4 |
| vpalignr ymm6,ymm6,ymm6,12 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm14,ymm14,ymm14,4 |
| vpalignr ymm5,ymm5,ymm5,12 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm13,ymm13,ymm13,4 |
| vpalignr ymm4,ymm4,ymm4,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| vpalignr ymm12,ymm12,ymm12,4 |
| |
| dec rcx |
| jne NEAR $L$seal_avx2_main_loop_rounds |
| vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] |
| vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] |
| vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] |
| vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] |
| vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] |
| vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] |
| vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] |
| vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] |
| vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] |
| vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] |
| vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| |
| vmovdqa YMMWORD[(160+128)+rbp],ymm0 |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| add r15,rax |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| add r10,QWORD[((0+16))+rdi] |
| adc r11,QWORD[((8+16))+rdi] |
| adc r12,1 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| add r15,rax |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[32+rdi] |
| vperm2i128 ymm0,ymm7,ymm3,0x02 |
| vperm2i128 ymm7,ymm7,ymm3,0x13 |
| vperm2i128 ymm3,ymm15,ymm11,0x02 |
| vperm2i128 ymm11,ymm15,ymm11,0x13 |
| vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] |
| vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] |
| vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] |
| vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] |
| vmovdqu YMMWORD[(0+0)+rdi],ymm0 |
| vmovdqu YMMWORD[(32+0)+rdi],ymm3 |
| vmovdqu YMMWORD[(64+0)+rdi],ymm7 |
| vmovdqu YMMWORD[(96+0)+rdi],ymm11 |
| |
| vmovdqa ymm0,YMMWORD[((160+128))+rbp] |
| vperm2i128 ymm3,ymm6,ymm2,0x02 |
| vperm2i128 ymm6,ymm6,ymm2,0x13 |
| vperm2i128 ymm2,ymm14,ymm10,0x02 |
| vperm2i128 ymm10,ymm14,ymm10,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] |
| vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] |
| vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] |
| vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] |
| vmovdqu YMMWORD[(0+128)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+128)+rdi],ymm2 |
| vmovdqu YMMWORD[(64+128)+rdi],ymm6 |
| vmovdqu YMMWORD[(96+128)+rdi],ymm10 |
| vperm2i128 ymm3,ymm5,ymm1,0x02 |
| vperm2i128 ymm5,ymm5,ymm1,0x13 |
| vperm2i128 ymm1,ymm13,ymm9,0x02 |
| vperm2i128 ymm9,ymm13,ymm9,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] |
| vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] |
| vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] |
| vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] |
| vmovdqu YMMWORD[(0+256)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+256)+rdi],ymm1 |
| vmovdqu YMMWORD[(64+256)+rdi],ymm5 |
| vmovdqu YMMWORD[(96+256)+rdi],ymm9 |
| vperm2i128 ymm3,ymm4,ymm0,0x02 |
| vperm2i128 ymm4,ymm4,ymm0,0x13 |
| vperm2i128 ymm0,ymm12,ymm8,0x02 |
| vperm2i128 ymm8,ymm12,ymm8,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+384))+rsi] |
| vpxor ymm0,ymm0,YMMWORD[((32+384))+rsi] |
| vpxor ymm4,ymm4,YMMWORD[((64+384))+rsi] |
| vpxor ymm8,ymm8,YMMWORD[((96+384))+rsi] |
| vmovdqu YMMWORD[(0+384)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+384)+rdi],ymm0 |
| vmovdqu YMMWORD[(64+384)+rdi],ymm4 |
| vmovdqu YMMWORD[(96+384)+rdi],ymm8 |
| |
| lea rsi,[512+rsi] |
| sub rbx,16*32 |
| cmp rbx,16*32 |
| jg NEAR $L$seal_avx2_main_loop |
| |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| add r15,rax |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| add r10,QWORD[((0+16))+rdi] |
| adc r11,QWORD[((8+16))+rdi] |
| adc r12,1 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| add r15,rax |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[32+rdi] |
| mov rcx,10 |
| xor r8,r8 |
| |
| cmp rbx,12*32 |
| ja NEAR $L$seal_avx2_tail_512 |
| cmp rbx,8*32 |
| ja NEAR $L$seal_avx2_tail_384 |
| cmp rbx,4*32 |
| ja NEAR $L$seal_avx2_tail_256 |
| |
| $L$seal_avx2_tail_128: |
| vmovdqa ymm0,YMMWORD[$L$chacha20_consts] |
| vmovdqa ymm4,YMMWORD[((160+64))+rbp] |
| vmovdqa ymm8,YMMWORD[((160+96))+rbp] |
| vmovdqa ymm12,YMMWORD[$L$avx2_inc] |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| vmovdqa YMMWORD[(160+160)+rbp],ymm12 |
| |
| $L$seal_avx2_tail_128_rounds_and_3xhash: |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| add r15,rax |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[16+rdi] |
| $L$seal_avx2_tail_128_rounds_and_2xhash: |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,4 |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| add r15,rax |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,12 |
| add r10,QWORD[((0+16))+rdi] |
| adc r11,QWORD[((8+16))+rdi] |
| adc r12,1 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| add r15,rax |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[32+rdi] |
| dec rcx |
| jg NEAR $L$seal_avx2_tail_128_rounds_and_3xhash |
| dec r8 |
| jge NEAR $L$seal_avx2_tail_128_rounds_and_2xhash |
| vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] |
| vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| vperm2i128 ymm3,ymm4,ymm0,0x13 |
| vperm2i128 ymm0,ymm4,ymm0,0x02 |
| vperm2i128 ymm4,ymm12,ymm8,0x02 |
| vperm2i128 ymm12,ymm12,ymm8,0x13 |
| vmovdqa ymm8,ymm3 |
| |
| jmp NEAR $L$seal_avx2_short_loop |
| |
| $L$seal_avx2_tail_256: |
| vmovdqa ymm0,YMMWORD[$L$chacha20_consts] |
| vmovdqa ymm4,YMMWORD[((160+64))+rbp] |
| vmovdqa ymm8,YMMWORD[((160+96))+rbp] |
| vmovdqa ymm1,ymm0 |
| vmovdqa ymm5,ymm4 |
| vmovdqa ymm9,ymm8 |
| vmovdqa ymm12,YMMWORD[$L$avx2_inc] |
| vpaddd ymm13,ymm12,YMMWORD[((160+160))+rbp] |
| vpaddd ymm12,ymm12,ymm13 |
| vmovdqa YMMWORD[(160+160)+rbp],ymm12 |
| vmovdqa YMMWORD[(160+192)+rbp],ymm13 |
| |
| $L$seal_avx2_tail_256_rounds_and_3xhash: |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[16+rdi] |
| $L$seal_avx2_tail_256_rounds_and_2xhash: |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,4 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,12 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,4 |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,12 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,4 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,12 |
| add r10,QWORD[((0+16))+rdi] |
| adc r11,QWORD[((8+16))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[32+rdi] |
| dec rcx |
| jg NEAR $L$seal_avx2_tail_256_rounds_and_3xhash |
| dec r8 |
| jge NEAR $L$seal_avx2_tail_256_rounds_and_2xhash |
| vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] |
| vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] |
| vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] |
| vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] |
| vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| vperm2i128 ymm3,ymm5,ymm1,0x02 |
| vperm2i128 ymm5,ymm5,ymm1,0x13 |
| vperm2i128 ymm1,ymm13,ymm9,0x02 |
| vperm2i128 ymm9,ymm13,ymm9,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] |
| vpxor ymm1,ymm1,YMMWORD[((32+0))+rsi] |
| vpxor ymm5,ymm5,YMMWORD[((64+0))+rsi] |
| vpxor ymm9,ymm9,YMMWORD[((96+0))+rsi] |
| vmovdqu YMMWORD[(0+0)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+0)+rdi],ymm1 |
| vmovdqu YMMWORD[(64+0)+rdi],ymm5 |
| vmovdqu YMMWORD[(96+0)+rdi],ymm9 |
| vperm2i128 ymm3,ymm4,ymm0,0x13 |
| vperm2i128 ymm0,ymm4,ymm0,0x02 |
| vperm2i128 ymm4,ymm12,ymm8,0x02 |
| vperm2i128 ymm12,ymm12,ymm8,0x13 |
| vmovdqa ymm8,ymm3 |
| |
| mov rcx,4*32 |
| lea rsi,[128+rsi] |
| sub rbx,4*32 |
| jmp NEAR $L$seal_avx2_short_hash_remainder |
| |
| $L$seal_avx2_tail_384: |
| vmovdqa ymm0,YMMWORD[$L$chacha20_consts] |
| vmovdqa ymm4,YMMWORD[((160+64))+rbp] |
| vmovdqa ymm8,YMMWORD[((160+96))+rbp] |
| vmovdqa ymm1,ymm0 |
| vmovdqa ymm5,ymm4 |
| vmovdqa ymm9,ymm8 |
| vmovdqa ymm2,ymm0 |
| vmovdqa ymm6,ymm4 |
| vmovdqa ymm10,ymm8 |
| vmovdqa ymm12,YMMWORD[$L$avx2_inc] |
| vpaddd ymm14,ymm12,YMMWORD[((160+160))+rbp] |
| vpaddd ymm13,ymm12,ymm14 |
| vpaddd ymm12,ymm12,ymm13 |
| vmovdqa YMMWORD[(160+160)+rbp],ymm12 |
| vmovdqa YMMWORD[(160+192)+rbp],ymm13 |
| vmovdqa YMMWORD[(160+224)+rbp],ymm14 |
| |
| $L$seal_avx2_tail_384_rounds_and_3xhash: |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[16+rdi] |
| $L$seal_avx2_tail_384_rounds_and_2xhash: |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,4 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,12 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,4 |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol16] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpsrld ymm3,ymm6,20 |
| vpslld ymm6,ymm6,12 |
| vpxor ymm6,ymm6,ymm3 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol8] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpslld ymm3,ymm6,7 |
| vpsrld ymm6,ymm6,25 |
| vpxor ymm6,ymm6,ymm3 |
| vpalignr ymm14,ymm14,ymm14,12 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm6,ymm6,ymm6,4 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,12 |
| add r10,QWORD[((0+16))+rdi] |
| adc r11,QWORD[((8+16))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,4 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,12 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol16] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpsrld ymm3,ymm6,20 |
| vpslld ymm6,ymm6,12 |
| vpxor ymm6,ymm6,ymm3 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol8] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpslld ymm3,ymm6,7 |
| vpsrld ymm6,ymm6,25 |
| vpxor ymm6,ymm6,ymm3 |
| vpalignr ymm14,ymm14,ymm14,4 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm6,ymm6,ymm6,12 |
| |
| lea rdi,[32+rdi] |
| dec rcx |
| jg NEAR $L$seal_avx2_tail_384_rounds_and_3xhash |
| dec r8 |
| jge NEAR $L$seal_avx2_tail_384_rounds_and_2xhash |
| vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] |
| vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] |
| vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] |
| vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] |
| vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] |
| vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] |
| vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] |
| vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| vperm2i128 ymm3,ymm6,ymm2,0x02 |
| vperm2i128 ymm6,ymm6,ymm2,0x13 |
| vperm2i128 ymm2,ymm14,ymm10,0x02 |
| vperm2i128 ymm10,ymm14,ymm10,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] |
| vpxor ymm2,ymm2,YMMWORD[((32+0))+rsi] |
| vpxor ymm6,ymm6,YMMWORD[((64+0))+rsi] |
| vpxor ymm10,ymm10,YMMWORD[((96+0))+rsi] |
| vmovdqu YMMWORD[(0+0)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+0)+rdi],ymm2 |
| vmovdqu YMMWORD[(64+0)+rdi],ymm6 |
| vmovdqu YMMWORD[(96+0)+rdi],ymm10 |
| vperm2i128 ymm3,ymm5,ymm1,0x02 |
| vperm2i128 ymm5,ymm5,ymm1,0x13 |
| vperm2i128 ymm1,ymm13,ymm9,0x02 |
| vperm2i128 ymm9,ymm13,ymm9,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] |
| vpxor ymm1,ymm1,YMMWORD[((32+128))+rsi] |
| vpxor ymm5,ymm5,YMMWORD[((64+128))+rsi] |
| vpxor ymm9,ymm9,YMMWORD[((96+128))+rsi] |
| vmovdqu YMMWORD[(0+128)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+128)+rdi],ymm1 |
| vmovdqu YMMWORD[(64+128)+rdi],ymm5 |
| vmovdqu YMMWORD[(96+128)+rdi],ymm9 |
| vperm2i128 ymm3,ymm4,ymm0,0x13 |
| vperm2i128 ymm0,ymm4,ymm0,0x02 |
| vperm2i128 ymm4,ymm12,ymm8,0x02 |
| vperm2i128 ymm12,ymm12,ymm8,0x13 |
| vmovdqa ymm8,ymm3 |
| |
| mov rcx,8*32 |
| lea rsi,[256+rsi] |
| sub rbx,8*32 |
| jmp NEAR $L$seal_avx2_short_hash_remainder |
| |
| $L$seal_avx2_tail_512: |
| vmovdqa ymm0,YMMWORD[$L$chacha20_consts] |
| vmovdqa ymm4,YMMWORD[((160+64))+rbp] |
| vmovdqa ymm8,YMMWORD[((160+96))+rbp] |
| vmovdqa ymm1,ymm0 |
| vmovdqa ymm5,ymm4 |
| vmovdqa ymm9,ymm8 |
| vmovdqa ymm2,ymm0 |
| vmovdqa ymm6,ymm4 |
| vmovdqa ymm10,ymm8 |
| vmovdqa ymm3,ymm0 |
| vmovdqa ymm7,ymm4 |
| vmovdqa ymm11,ymm8 |
| vmovdqa ymm12,YMMWORD[$L$avx2_inc] |
| vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] |
| vpaddd ymm14,ymm12,ymm15 |
| vpaddd ymm13,ymm12,ymm14 |
| vpaddd ymm12,ymm12,ymm13 |
| vmovdqa YMMWORD[(160+256)+rbp],ymm15 |
| vmovdqa YMMWORD[(160+224)+rbp],ymm14 |
| vmovdqa YMMWORD[(160+192)+rbp],ymm13 |
| vmovdqa YMMWORD[(160+160)+rbp],ymm12 |
| |
| $L$seal_avx2_tail_512_rounds_and_3xhash: |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| add r15,rax |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[16+rdi] |
| $L$seal_avx2_tail_512_rounds_and_2xhash: |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol16] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,20 |
| vpslld ymm7,ymm7,32-20 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,20 |
| vpslld ymm6,ymm6,32-20 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,20 |
| vpslld ymm5,ymm5,32-20 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,20 |
| vpslld ymm4,ymm4,32-20 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol8] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,25 |
| vpslld ymm7,ymm7,32-25 |
| vpxor ymm7,ymm7,ymm8 |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| vpsrld ymm8,ymm6,25 |
| vpslld ymm6,ymm6,32-25 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,25 |
| vpslld ymm5,ymm5,32-25 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,25 |
| vpslld ymm4,ymm4,32-25 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[((160+128))+rbp] |
| vpalignr ymm7,ymm7,ymm7,4 |
| vpalignr ymm11,ymm11,ymm11,8 |
| vpalignr ymm15,ymm15,ymm15,12 |
| vpalignr ymm6,ymm6,ymm6,4 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm14,ymm14,ymm14,12 |
| vpalignr ymm5,ymm5,ymm5,4 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm13,ymm13,ymm13,12 |
| vpalignr ymm4,ymm4,ymm4,4 |
| add r15,rax |
| adc r9,rdx |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol16] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,20 |
| vpslld ymm7,ymm7,32-20 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,20 |
| vpslld ymm6,ymm6,32-20 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,20 |
| vpslld ymm5,ymm5,32-20 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,20 |
| vpslld ymm4,ymm4,32-20 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[$L$rol8] |
| vpaddd ymm3,ymm3,ymm7 |
| vpaddd ymm2,ymm2,ymm6 |
| add r10,QWORD[((0+16))+rdi] |
| adc r11,QWORD[((8+16))+rdi] |
| adc r12,1 |
| vpaddd ymm1,ymm1,ymm5 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm15,ymm15,ymm3 |
| vpxor ymm14,ymm14,ymm2 |
| vpxor ymm13,ymm13,ymm1 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm15,ymm15,ymm8 |
| vpshufb ymm14,ymm14,ymm8 |
| vpshufb ymm13,ymm13,ymm8 |
| vpshufb ymm12,ymm12,ymm8 |
| vpaddd ymm11,ymm11,ymm15 |
| vpaddd ymm10,ymm10,ymm14 |
| vpaddd ymm9,ymm9,ymm13 |
| vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] |
| vpxor ymm7,ymm7,ymm11 |
| vpxor ymm6,ymm6,ymm10 |
| vpxor ymm5,ymm5,ymm9 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa YMMWORD[(160+128)+rbp],ymm8 |
| vpsrld ymm8,ymm7,25 |
| mov rdx,QWORD[((0+160+0))+rbp] |
| mov r15,rdx |
| mulx r14,r13,r10 |
| mulx rdx,rax,r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| vpslld ymm7,ymm7,32-25 |
| vpxor ymm7,ymm7,ymm8 |
| vpsrld ymm8,ymm6,25 |
| vpslld ymm6,ymm6,32-25 |
| vpxor ymm6,ymm6,ymm8 |
| vpsrld ymm8,ymm5,25 |
| vpslld ymm5,ymm5,32-25 |
| vpxor ymm5,ymm5,ymm8 |
| vpsrld ymm8,ymm4,25 |
| vpslld ymm4,ymm4,32-25 |
| vpxor ymm4,ymm4,ymm8 |
| vmovdqa ymm8,YMMWORD[((160+128))+rbp] |
| vpalignr ymm7,ymm7,ymm7,12 |
| vpalignr ymm11,ymm11,ymm11,8 |
| vpalignr ymm15,ymm15,ymm15,4 |
| vpalignr ymm6,ymm6,ymm6,12 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm14,ymm14,ymm14,4 |
| vpalignr ymm5,ymm5,ymm5,12 |
| vpalignr ymm9,ymm9,ymm9,8 |
| mov rdx,QWORD[((8+160+0))+rbp] |
| mulx rax,r10,r10 |
| add r14,r10 |
| mulx r9,r11,r11 |
| adc r15,r11 |
| adc r9,0 |
| imul rdx,r12 |
| vpalignr ymm13,ymm13,ymm13,4 |
| vpalignr ymm4,ymm4,ymm4,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm12,ymm12,ymm12,4 |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| add r15,rax |
| adc r9,rdx |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[32+rdi] |
| dec rcx |
| jg NEAR $L$seal_avx2_tail_512_rounds_and_3xhash |
| dec r8 |
| jge NEAR $L$seal_avx2_tail_512_rounds_and_2xhash |
| vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] |
| vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] |
| vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] |
| vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] |
| vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] |
| vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] |
| vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] |
| vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] |
| vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] |
| vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] |
| vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| |
| vmovdqa YMMWORD[(160+128)+rbp],ymm0 |
| vperm2i128 ymm0,ymm7,ymm3,0x02 |
| vperm2i128 ymm7,ymm7,ymm3,0x13 |
| vperm2i128 ymm3,ymm15,ymm11,0x02 |
| vperm2i128 ymm11,ymm15,ymm11,0x13 |
| vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] |
| vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] |
| vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] |
| vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] |
| vmovdqu YMMWORD[(0+0)+rdi],ymm0 |
| vmovdqu YMMWORD[(32+0)+rdi],ymm3 |
| vmovdqu YMMWORD[(64+0)+rdi],ymm7 |
| vmovdqu YMMWORD[(96+0)+rdi],ymm11 |
| |
| vmovdqa ymm0,YMMWORD[((160+128))+rbp] |
| vperm2i128 ymm3,ymm6,ymm2,0x02 |
| vperm2i128 ymm6,ymm6,ymm2,0x13 |
| vperm2i128 ymm2,ymm14,ymm10,0x02 |
| vperm2i128 ymm10,ymm14,ymm10,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] |
| vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] |
| vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] |
| vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] |
| vmovdqu YMMWORD[(0+128)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+128)+rdi],ymm2 |
| vmovdqu YMMWORD[(64+128)+rdi],ymm6 |
| vmovdqu YMMWORD[(96+128)+rdi],ymm10 |
| vperm2i128 ymm3,ymm5,ymm1,0x02 |
| vperm2i128 ymm5,ymm5,ymm1,0x13 |
| vperm2i128 ymm1,ymm13,ymm9,0x02 |
| vperm2i128 ymm9,ymm13,ymm9,0x13 |
| vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] |
| vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] |
| vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] |
| vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] |
| vmovdqu YMMWORD[(0+256)+rdi],ymm3 |
| vmovdqu YMMWORD[(32+256)+rdi],ymm1 |
| vmovdqu YMMWORD[(64+256)+rdi],ymm5 |
| vmovdqu YMMWORD[(96+256)+rdi],ymm9 |
| vperm2i128 ymm3,ymm4,ymm0,0x13 |
| vperm2i128 ymm0,ymm4,ymm0,0x02 |
| vperm2i128 ymm4,ymm12,ymm8,0x02 |
| vperm2i128 ymm12,ymm12,ymm8,0x13 |
| vmovdqa ymm8,ymm3 |
| |
| mov rcx,12*32 |
| lea rsi,[384+rsi] |
| sub rbx,12*32 |
| jmp NEAR $L$seal_avx2_short_hash_remainder |
| |
| $L$seal_avx2_320: |
| vmovdqa ymm1,ymm0 |
| vmovdqa ymm2,ymm0 |
| vmovdqa ymm5,ymm4 |
| vmovdqa ymm6,ymm4 |
| vmovdqa ymm9,ymm8 |
| vmovdqa ymm10,ymm8 |
| vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] |
| vpaddd ymm14,ymm13,YMMWORD[$L$avx2_inc] |
| vmovdqa ymm7,ymm4 |
| vmovdqa ymm11,ymm8 |
| vmovdqa YMMWORD[(160+160)+rbp],ymm12 |
| vmovdqa YMMWORD[(160+192)+rbp],ymm13 |
| vmovdqa YMMWORD[(160+224)+rbp],ymm14 |
| mov r10,10 |
| $L$seal_avx2_320_rounds: |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,4 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,12 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,4 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol16] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpsrld ymm3,ymm6,20 |
| vpslld ymm6,ymm6,12 |
| vpxor ymm6,ymm6,ymm3 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol8] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpslld ymm3,ymm6,7 |
| vpsrld ymm6,ymm6,25 |
| vpxor ymm6,ymm6,ymm3 |
| vpalignr ymm14,ymm14,ymm14,12 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm6,ymm6,ymm6,4 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,12 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,4 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,12 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol16] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpsrld ymm3,ymm6,20 |
| vpslld ymm6,ymm6,12 |
| vpxor ymm6,ymm6,ymm3 |
| vpaddd ymm2,ymm2,ymm6 |
| vpxor ymm14,ymm14,ymm2 |
| vpshufb ymm14,ymm14,YMMWORD[$L$rol8] |
| vpaddd ymm10,ymm10,ymm14 |
| vpxor ymm6,ymm6,ymm10 |
| vpslld ymm3,ymm6,7 |
| vpsrld ymm6,ymm6,25 |
| vpxor ymm6,ymm6,ymm3 |
| vpalignr ymm14,ymm14,ymm14,4 |
| vpalignr ymm10,ymm10,ymm10,8 |
| vpalignr ymm6,ymm6,ymm6,12 |
| |
| dec r10 |
| jne NEAR $L$seal_avx2_320_rounds |
| vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] |
| vpaddd ymm4,ymm4,ymm7 |
| vpaddd ymm5,ymm5,ymm7 |
| vpaddd ymm6,ymm6,ymm7 |
| vpaddd ymm8,ymm8,ymm11 |
| vpaddd ymm9,ymm9,ymm11 |
| vpaddd ymm10,ymm10,ymm11 |
| vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] |
| vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] |
| vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] |
| vperm2i128 ymm3,ymm4,ymm0,0x02 |
| |
| vpand ymm3,ymm3,YMMWORD[$L$clamp] |
| vmovdqa YMMWORD[(160+0)+rbp],ymm3 |
| |
| vperm2i128 ymm0,ymm4,ymm0,0x13 |
| vperm2i128 ymm4,ymm12,ymm8,0x13 |
| vperm2i128 ymm8,ymm5,ymm1,0x02 |
| vperm2i128 ymm12,ymm13,ymm9,0x02 |
| vperm2i128 ymm1,ymm5,ymm1,0x13 |
| vperm2i128 ymm5,ymm13,ymm9,0x13 |
| vperm2i128 ymm9,ymm6,ymm2,0x02 |
| vperm2i128 ymm13,ymm14,ymm10,0x02 |
| vperm2i128 ymm2,ymm6,ymm2,0x13 |
| vperm2i128 ymm6,ymm14,ymm10,0x13 |
| jmp NEAR $L$seal_avx2_short |
| |
| $L$seal_avx2_192: |
| vmovdqa ymm1,ymm0 |
| vmovdqa ymm2,ymm0 |
| vmovdqa ymm5,ymm4 |
| vmovdqa ymm6,ymm4 |
| vmovdqa ymm9,ymm8 |
| vmovdqa ymm10,ymm8 |
| vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] |
| vmovdqa ymm11,ymm12 |
| vmovdqa ymm15,ymm13 |
| mov r10,10 |
| $L$seal_avx2_192_rounds: |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,12 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,4 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,12 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,4 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol16] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpsrld ymm3,ymm4,20 |
| vpslld ymm4,ymm4,12 |
| vpxor ymm4,ymm4,ymm3 |
| vpaddd ymm0,ymm0,ymm4 |
| vpxor ymm12,ymm12,ymm0 |
| vpshufb ymm12,ymm12,YMMWORD[$L$rol8] |
| vpaddd ymm8,ymm8,ymm12 |
| vpxor ymm4,ymm4,ymm8 |
| vpslld ymm3,ymm4,7 |
| vpsrld ymm4,ymm4,25 |
| vpxor ymm4,ymm4,ymm3 |
| vpalignr ymm12,ymm12,ymm12,4 |
| vpalignr ymm8,ymm8,ymm8,8 |
| vpalignr ymm4,ymm4,ymm4,12 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol16] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpsrld ymm3,ymm5,20 |
| vpslld ymm5,ymm5,12 |
| vpxor ymm5,ymm5,ymm3 |
| vpaddd ymm1,ymm1,ymm5 |
| vpxor ymm13,ymm13,ymm1 |
| vpshufb ymm13,ymm13,YMMWORD[$L$rol8] |
| vpaddd ymm9,ymm9,ymm13 |
| vpxor ymm5,ymm5,ymm9 |
| vpslld ymm3,ymm5,7 |
| vpsrld ymm5,ymm5,25 |
| vpxor ymm5,ymm5,ymm3 |
| vpalignr ymm13,ymm13,ymm13,4 |
| vpalignr ymm9,ymm9,ymm9,8 |
| vpalignr ymm5,ymm5,ymm5,12 |
| |
| dec r10 |
| jne NEAR $L$seal_avx2_192_rounds |
| vpaddd ymm0,ymm0,ymm2 |
| vpaddd ymm1,ymm1,ymm2 |
| vpaddd ymm4,ymm4,ymm6 |
| vpaddd ymm5,ymm5,ymm6 |
| vpaddd ymm8,ymm8,ymm10 |
| vpaddd ymm9,ymm9,ymm10 |
| vpaddd ymm12,ymm12,ymm11 |
| vpaddd ymm13,ymm13,ymm15 |
| vperm2i128 ymm3,ymm4,ymm0,0x02 |
| |
| vpand ymm3,ymm3,YMMWORD[$L$clamp] |
| vmovdqa YMMWORD[(160+0)+rbp],ymm3 |
| |
| vperm2i128 ymm0,ymm4,ymm0,0x13 |
| vperm2i128 ymm4,ymm12,ymm8,0x13 |
| vperm2i128 ymm8,ymm5,ymm1,0x02 |
| vperm2i128 ymm12,ymm13,ymm9,0x02 |
| vperm2i128 ymm1,ymm5,ymm1,0x13 |
| vperm2i128 ymm5,ymm13,ymm9,0x13 |
| $L$seal_avx2_short: |
| mov r8,r8 |
| call poly_hash_ad_internal |
| xor rcx,rcx |
| $L$seal_avx2_short_hash_remainder: |
| cmp rcx,16 |
| jb NEAR $L$seal_avx2_short_loop |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| sub rcx,16 |
| add rdi,16 |
| jmp NEAR $L$seal_avx2_short_hash_remainder |
| $L$seal_avx2_short_loop: |
| cmp rbx,32 |
| jb NEAR $L$seal_avx2_short_tail |
| sub rbx,32 |
| |
| vpxor ymm0,ymm0,YMMWORD[rsi] |
| vmovdqu YMMWORD[rdi],ymm0 |
| lea rsi,[32+rsi] |
| |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| add r10,QWORD[((0+16))+rdi] |
| adc r11,QWORD[((8+16))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[32+rdi] |
| |
| vmovdqa ymm0,ymm4 |
| vmovdqa ymm4,ymm8 |
| vmovdqa ymm8,ymm12 |
| vmovdqa ymm12,ymm1 |
| vmovdqa ymm1,ymm5 |
| vmovdqa ymm5,ymm9 |
| vmovdqa ymm9,ymm13 |
| vmovdqa ymm13,ymm2 |
| vmovdqa ymm2,ymm6 |
| jmp NEAR $L$seal_avx2_short_loop |
| $L$seal_avx2_short_tail: |
| cmp rbx,16 |
| jb NEAR $L$seal_avx2_exit |
| sub rbx,16 |
| vpxor xmm3,xmm0,XMMWORD[rsi] |
| vmovdqu XMMWORD[rdi],xmm3 |
| lea rsi,[16+rsi] |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[16+rdi] |
| vextracti128 xmm0,ymm0,1 |
| $L$seal_avx2_exit: |
| vzeroupper |
| jmp NEAR $L$seal_sse_tail_16 |
| |
| $L$SEH_end_chacha20_poly1305_seal_avx2: |
| %else |
| ; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 |
| ret |
| %endif |