| ; This file is generated from a similarly-named Perl script in the BoringSSL |
| ; source tree. Do not edit by hand. |
| |
| %ifidn __OUTPUT_FORMAT__, win64 |
| default rel |
| %define XMMWORD |
| %define YMMWORD |
| %define ZMMWORD |
| %define _CET_ENDBR |
| |
| %ifdef BORINGSSL_PREFIX |
| %include "boringssl_prefix_symbols_nasm.inc" |
| %endif |
| section .rdata rdata align=8 |
| ALIGN 64 |
| chacha20_poly1305_constants: |
| $L$chacha20_consts: |
| DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' |
| DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' |
| $L$rol8: |
| DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 |
| DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 |
| $L$rol16: |
| DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 |
| DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 |
| $L$avx2_init: |
| DD 0,0,0,0 |
| $L$sse_inc: |
| DD 1,0,0,0 |
| $L$avx2_inc: |
| DD 2,0,0,0,2,0,0,0 |
| $L$clamp: |
| DQ 0x0FFFFFFC0FFFFFFF,0x0FFFFFFC0FFFFFFC |
| DQ 0xFFFFFFFFFFFFFFFF,0xFFFFFFFFFFFFFFFF |
| ALIGN 16 |
| $L$and_masks: |
| DB 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 |
| DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff |
| section .text code align=64 |
| |
| |
| |
| ALIGN 64 |
| poly_hash_ad_internal: |
| |
| |
| xor r10,r10 |
| xor r11,r11 |
| xor r12,r12 |
| cmp r8,13 |
| jne NEAR $L$hash_ad_loop |
| $L$poly_fast_tls_ad: |
| |
| mov r10,QWORD[rcx] |
| mov r11,QWORD[5+rcx] |
| shr r11,24 |
| mov r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| ret |
| $L$hash_ad_loop: |
| |
| cmp r8,16 |
| jb NEAR $L$hash_ad_tail |
| add r10,QWORD[((0+0))+rcx] |
| adc r11,QWORD[((8+0))+rcx] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rcx,[16+rcx] |
| sub r8,16 |
| jmp NEAR $L$hash_ad_loop |
| $L$hash_ad_tail: |
| cmp r8,0 |
| je NEAR $L$hash_ad_done |
| |
| xor r13,r13 |
| xor r14,r14 |
| xor r15,r15 |
| add rcx,r8 |
| $L$hash_ad_tail_loop: |
| shld r14,r13,8 |
| shl r13,8 |
| movzx r15,BYTE[((-1))+rcx] |
| xor r13,r15 |
| dec rcx |
| dec r8 |
| jne NEAR $L$hash_ad_tail_loop |
| |
| add r10,r13 |
| adc r11,r14 |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| |
| $L$hash_ad_done: |
| ret |
| |
| |
| |
| global chacha20_poly1305_open_nohw |
| |
| ALIGN 64 |
| chacha20_poly1305_open_nohw: |
| mov QWORD[8+rsp],rdi ;WIN64 prologue |
| mov QWORD[16+rsp],rsi |
| mov rax,rsp |
| $L$SEH_begin_chacha20_poly1305_open_nohw: |
| mov rdi,rcx |
| mov rsi,rdx |
| mov rdx,r8 |
| mov rcx,r9 |
| mov r8,QWORD[40+rsp] |
| mov r9,QWORD[48+rsp] |
| |
| |
| |
| _CET_ENDBR |
| push rbp |
| |
| push rbx |
| |
| push r12 |
| |
| push r13 |
| |
| push r14 |
| |
| push r15 |
| |
| |
| |
| push r9 |
| |
| sub rsp,288 + 160 + 32 |
| |
| |
| lea rbp,[32+rsp] |
| and rbp,-32 |
| |
| movaps XMMWORD[(0+0)+rbp],xmm6 |
| movaps XMMWORD[(16+0)+rbp],xmm7 |
| movaps XMMWORD[(32+0)+rbp],xmm8 |
| movaps XMMWORD[(48+0)+rbp],xmm9 |
| movaps XMMWORD[(64+0)+rbp],xmm10 |
| movaps XMMWORD[(80+0)+rbp],xmm11 |
| movaps XMMWORD[(96+0)+rbp],xmm12 |
| movaps XMMWORD[(112+0)+rbp],xmm13 |
| movaps XMMWORD[(128+0)+rbp],xmm14 |
| movaps XMMWORD[(144+0)+rbp],xmm15 |
| |
| mov rbx,rdx |
| mov QWORD[((0+160+32))+rbp],r8 |
| mov QWORD[((8+160+32))+rbp],rbx |
| |
| cmp rbx,128 |
| jbe NEAR $L$open_sse_128 |
| |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqu xmm4,XMMWORD[r9] |
| movdqu xmm8,XMMWORD[16+r9] |
| movdqu xmm12,XMMWORD[32+r9] |
| |
| movdqa xmm7,xmm12 |
| |
| movdqa XMMWORD[(160+48)+rbp],xmm4 |
| movdqa XMMWORD[(160+64)+rbp],xmm8 |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| mov r10,10 |
| $L$open_sse_init_rounds: |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| |
| dec r10 |
| jne NEAR $L$open_sse_init_rounds |
| |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| |
| pand xmm0,XMMWORD[$L$clamp] |
| movdqa XMMWORD[(160+0)+rbp],xmm0 |
| movdqa XMMWORD[(160+16)+rbp],xmm4 |
| |
| mov r8,r8 |
| call poly_hash_ad_internal |
| $L$open_sse_main_loop: |
| cmp rbx,16*16 |
| jb NEAR $L$open_sse_tail |
| |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm1,xmm0 |
| movdqa xmm5,xmm4 |
| movdqa xmm9,xmm8 |
| movdqa xmm2,xmm0 |
| movdqa xmm6,xmm4 |
| movdqa xmm10,xmm8 |
| movdqa xmm3,xmm0 |
| movdqa xmm7,xmm4 |
| movdqa xmm11,xmm8 |
| movdqa xmm15,XMMWORD[((160+96))+rbp] |
| paddd xmm15,XMMWORD[$L$sse_inc] |
| movdqa xmm14,xmm15 |
| paddd xmm14,XMMWORD[$L$sse_inc] |
| movdqa xmm13,xmm14 |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm12,xmm13 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| movdqa XMMWORD[(160+112)+rbp],xmm13 |
| movdqa XMMWORD[(160+128)+rbp],xmm14 |
| movdqa XMMWORD[(160+144)+rbp],xmm15 |
| |
| |
| |
| mov rcx,4 |
| mov r8,rsi |
| $L$open_sse_main_loop_rounds: |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,XMMWORD[$L$rol16] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| add r10,QWORD[((0+0))+r8] |
| adc r11,QWORD[((8+0))+r8] |
| adc r12,1 |
| |
| lea r8,[16+r8] |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,20 |
| pslld xmm7,32-20 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,20 |
| pslld xmm6,32-20 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,20 |
| pslld xmm5,32-20 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,20 |
| pslld xmm4,32-20 |
| pxor xmm4,xmm8 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| movdqa xmm8,XMMWORD[$L$rol8] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,25 |
| pslld xmm7,32-25 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,25 |
| pslld xmm6,32-25 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,25 |
| pslld xmm5,32-25 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,25 |
| pslld xmm4,32-25 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| DB 102,15,58,15,255,4 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,12 |
| DB 102,15,58,15,246,4 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,12 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,XMMWORD[$L$rol16] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,20 |
| pslld xmm7,32-20 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,20 |
| pslld xmm6,32-20 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,20 |
| pslld xmm5,32-20 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,20 |
| pslld xmm4,32-20 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[$L$rol8] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,25 |
| pslld xmm7,32-25 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,25 |
| pslld xmm6,32-25 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,25 |
| pslld xmm5,32-25 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,25 |
| pslld xmm4,32-25 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| DB 102,15,58,15,255,12 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,4 |
| DB 102,15,58,15,246,12 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,4 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| |
| dec rcx |
| jge NEAR $L$open_sse_main_loop_rounds |
| add r10,QWORD[((0+0))+r8] |
| adc r11,QWORD[((8+0))+r8] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea r8,[16+r8] |
| cmp rcx,-6 |
| jg NEAR $L$open_sse_main_loop_rounds |
| paddd xmm3,XMMWORD[$L$chacha20_consts] |
| paddd xmm7,XMMWORD[((160+48))+rbp] |
| paddd xmm11,XMMWORD[((160+64))+rbp] |
| paddd xmm15,XMMWORD[((160+144))+rbp] |
| paddd xmm2,XMMWORD[$L$chacha20_consts] |
| paddd xmm6,XMMWORD[((160+48))+rbp] |
| paddd xmm10,XMMWORD[((160+64))+rbp] |
| paddd xmm14,XMMWORD[((160+128))+rbp] |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm5,XMMWORD[((160+48))+rbp] |
| paddd xmm9,XMMWORD[((160+64))+rbp] |
| paddd xmm13,XMMWORD[((160+112))+rbp] |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| movdqa XMMWORD[(160+80)+rbp],xmm12 |
| movdqu xmm12,XMMWORD[((0 + 0))+rsi] |
| pxor xmm12,xmm3 |
| movdqu XMMWORD[(0 + 0)+rdi],xmm12 |
| movdqu xmm12,XMMWORD[((16 + 0))+rsi] |
| pxor xmm12,xmm7 |
| movdqu XMMWORD[(16 + 0)+rdi],xmm12 |
| movdqu xmm12,XMMWORD[((32 + 0))+rsi] |
| pxor xmm12,xmm11 |
| movdqu XMMWORD[(32 + 0)+rdi],xmm12 |
| movdqu xmm12,XMMWORD[((48 + 0))+rsi] |
| pxor xmm12,xmm15 |
| movdqu XMMWORD[(48 + 0)+rdi],xmm12 |
| movdqu xmm3,XMMWORD[((0 + 64))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 64))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 64))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 64))+rsi] |
| pxor xmm2,xmm3 |
| pxor xmm6,xmm7 |
| pxor xmm10,xmm11 |
| pxor xmm15,xmm14 |
| movdqu XMMWORD[(0 + 64)+rdi],xmm2 |
| movdqu XMMWORD[(16 + 64)+rdi],xmm6 |
| movdqu XMMWORD[(32 + 64)+rdi],xmm10 |
| movdqu XMMWORD[(48 + 64)+rdi],xmm15 |
| movdqu xmm3,XMMWORD[((0 + 128))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 128))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 128))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 128))+rsi] |
| pxor xmm1,xmm3 |
| pxor xmm5,xmm7 |
| pxor xmm9,xmm11 |
| pxor xmm15,xmm13 |
| movdqu XMMWORD[(0 + 128)+rdi],xmm1 |
| movdqu XMMWORD[(16 + 128)+rdi],xmm5 |
| movdqu XMMWORD[(32 + 128)+rdi],xmm9 |
| movdqu XMMWORD[(48 + 128)+rdi],xmm15 |
| movdqu xmm3,XMMWORD[((0 + 192))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 192))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 192))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 192))+rsi] |
| pxor xmm0,xmm3 |
| pxor xmm4,xmm7 |
| pxor xmm8,xmm11 |
| pxor xmm15,XMMWORD[((160+80))+rbp] |
| movdqu XMMWORD[(0 + 192)+rdi],xmm0 |
| movdqu XMMWORD[(16 + 192)+rdi],xmm4 |
| movdqu XMMWORD[(32 + 192)+rdi],xmm8 |
| movdqu XMMWORD[(48 + 192)+rdi],xmm15 |
| |
| lea rsi,[256+rsi] |
| lea rdi,[256+rdi] |
| sub rbx,16*16 |
| jmp NEAR $L$open_sse_main_loop |
| $L$open_sse_tail: |
| |
| test rbx,rbx |
| jz NEAR $L$open_sse_finalize |
| cmp rbx,12*16 |
| ja NEAR $L$open_sse_tail_256 |
| cmp rbx,8*16 |
| ja NEAR $L$open_sse_tail_192 |
| cmp rbx,4*16 |
| ja NEAR $L$open_sse_tail_128 |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm12,XMMWORD[((160+96))+rbp] |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| |
| xor r8,r8 |
| mov rcx,rbx |
| cmp rcx,16 |
| jb NEAR $L$open_sse_tail_64_rounds |
| $L$open_sse_tail_64_rounds_and_x1hash: |
| add r10,QWORD[((0+0))+r8*1+rsi] |
| adc r11,QWORD[((8+0))+r8*1+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| sub rcx,16 |
| $L$open_sse_tail_64_rounds: |
| add r8,16 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| |
| cmp rcx,16 |
| jae NEAR $L$open_sse_tail_64_rounds_and_x1hash |
| cmp r8,10*16 |
| jne NEAR $L$open_sse_tail_64_rounds |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| |
| jmp NEAR $L$open_sse_tail_64_dec_loop |
| |
| $L$open_sse_tail_128: |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm1,xmm0 |
| movdqa xmm5,xmm4 |
| movdqa xmm9,xmm8 |
| movdqa xmm13,XMMWORD[((160+96))+rbp] |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm12,xmm13 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| movdqa XMMWORD[(160+112)+rbp],xmm13 |
| |
| mov rcx,rbx |
| and rcx,-16 |
| xor r8,r8 |
| $L$open_sse_tail_128_rounds_and_x1hash: |
| add r10,QWORD[((0+0))+r8*1+rsi] |
| adc r11,QWORD[((8+0))+r8*1+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| $L$open_sse_tail_128_rounds: |
| add r8,16 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| |
| cmp r8,rcx |
| jb NEAR $L$open_sse_tail_128_rounds_and_x1hash |
| cmp r8,10*16 |
| jne NEAR $L$open_sse_tail_128_rounds |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm5,XMMWORD[((160+48))+rbp] |
| paddd xmm9,XMMWORD[((160+64))+rbp] |
| paddd xmm13,XMMWORD[((160+112))+rbp] |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| movdqu xmm3,XMMWORD[((0 + 0))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 0))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 0))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 0))+rsi] |
| pxor xmm1,xmm3 |
| pxor xmm5,xmm7 |
| pxor xmm9,xmm11 |
| pxor xmm15,xmm13 |
| movdqu XMMWORD[(0 + 0)+rdi],xmm1 |
| movdqu XMMWORD[(16 + 0)+rdi],xmm5 |
| movdqu XMMWORD[(32 + 0)+rdi],xmm9 |
| movdqu XMMWORD[(48 + 0)+rdi],xmm15 |
| |
| sub rbx,4*16 |
| lea rsi,[64+rsi] |
| lea rdi,[64+rdi] |
| jmp NEAR $L$open_sse_tail_64_dec_loop |
| |
| $L$open_sse_tail_192: |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm1,xmm0 |
| movdqa xmm5,xmm4 |
| movdqa xmm9,xmm8 |
| movdqa xmm2,xmm0 |
| movdqa xmm6,xmm4 |
| movdqa xmm10,xmm8 |
| movdqa xmm14,XMMWORD[((160+96))+rbp] |
| paddd xmm14,XMMWORD[$L$sse_inc] |
| movdqa xmm13,xmm14 |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm12,xmm13 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| movdqa XMMWORD[(160+112)+rbp],xmm13 |
| movdqa XMMWORD[(160+128)+rbp],xmm14 |
| |
| mov rcx,rbx |
| mov r8,10*16 |
| cmp rcx,10*16 |
| cmovg rcx,r8 |
| and rcx,-16 |
| xor r8,r8 |
| $L$open_sse_tail_192_rounds_and_x1hash: |
| add r10,QWORD[((0+0))+r8*1+rsi] |
| adc r11,QWORD[((8+0))+r8*1+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| $L$open_sse_tail_192_rounds: |
| add r8,16 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm3 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm3 |
| DB 102,15,58,15,246,4 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,12 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm3 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm3 |
| DB 102,15,58,15,246,12 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,4 |
| |
| cmp r8,rcx |
| jb NEAR $L$open_sse_tail_192_rounds_and_x1hash |
| cmp r8,10*16 |
| jne NEAR $L$open_sse_tail_192_rounds |
| cmp rbx,11*16 |
| jb NEAR $L$open_sse_tail_192_finish |
| add r10,QWORD[((0+160))+rsi] |
| adc r11,QWORD[((8+160))+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| cmp rbx,12*16 |
| jb NEAR $L$open_sse_tail_192_finish |
| add r10,QWORD[((0+176))+rsi] |
| adc r11,QWORD[((8+176))+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| $L$open_sse_tail_192_finish: |
| paddd xmm2,XMMWORD[$L$chacha20_consts] |
| paddd xmm6,XMMWORD[((160+48))+rbp] |
| paddd xmm10,XMMWORD[((160+64))+rbp] |
| paddd xmm14,XMMWORD[((160+128))+rbp] |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm5,XMMWORD[((160+48))+rbp] |
| paddd xmm9,XMMWORD[((160+64))+rbp] |
| paddd xmm13,XMMWORD[((160+112))+rbp] |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| movdqu xmm3,XMMWORD[((0 + 0))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 0))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 0))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 0))+rsi] |
| pxor xmm2,xmm3 |
| pxor xmm6,xmm7 |
| pxor xmm10,xmm11 |
| pxor xmm15,xmm14 |
| movdqu XMMWORD[(0 + 0)+rdi],xmm2 |
| movdqu XMMWORD[(16 + 0)+rdi],xmm6 |
| movdqu XMMWORD[(32 + 0)+rdi],xmm10 |
| movdqu XMMWORD[(48 + 0)+rdi],xmm15 |
| movdqu xmm3,XMMWORD[((0 + 64))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 64))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 64))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 64))+rsi] |
| pxor xmm1,xmm3 |
| pxor xmm5,xmm7 |
| pxor xmm9,xmm11 |
| pxor xmm15,xmm13 |
| movdqu XMMWORD[(0 + 64)+rdi],xmm1 |
| movdqu XMMWORD[(16 + 64)+rdi],xmm5 |
| movdqu XMMWORD[(32 + 64)+rdi],xmm9 |
| movdqu XMMWORD[(48 + 64)+rdi],xmm15 |
| |
| sub rbx,8*16 |
| lea rsi,[128+rsi] |
| lea rdi,[128+rdi] |
| jmp NEAR $L$open_sse_tail_64_dec_loop |
| |
| $L$open_sse_tail_256: |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm1,xmm0 |
| movdqa xmm5,xmm4 |
| movdqa xmm9,xmm8 |
| movdqa xmm2,xmm0 |
| movdqa xmm6,xmm4 |
| movdqa xmm10,xmm8 |
| movdqa xmm3,xmm0 |
| movdqa xmm7,xmm4 |
| movdqa xmm11,xmm8 |
| movdqa xmm15,XMMWORD[((160+96))+rbp] |
| paddd xmm15,XMMWORD[$L$sse_inc] |
| movdqa xmm14,xmm15 |
| paddd xmm14,XMMWORD[$L$sse_inc] |
| movdqa xmm13,xmm14 |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm12,xmm13 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| movdqa XMMWORD[(160+112)+rbp],xmm13 |
| movdqa XMMWORD[(160+128)+rbp],xmm14 |
| movdqa XMMWORD[(160+144)+rbp],xmm15 |
| |
| xor r8,r8 |
| $L$open_sse_tail_256_rounds_and_x1hash: |
| add r10,QWORD[((0+0))+r8*1+rsi] |
| adc r11,QWORD[((8+0))+r8*1+rsi] |
| adc r12,1 |
| movdqa XMMWORD[(160+80)+rbp],xmm11 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm11,xmm4 |
| pslld xmm11,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm11 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm11,xmm4 |
| pslld xmm11,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm11 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm11,xmm5 |
| pslld xmm11,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm11 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm11,xmm5 |
| pslld xmm11,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm11 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm11,xmm6 |
| pslld xmm11,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm11 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm11,xmm6 |
| pslld xmm11,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm11 |
| DB 102,15,58,15,246,4 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,12 |
| movdqa xmm11,XMMWORD[((160+80))+rbp] |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| movdqa XMMWORD[(160+80)+rbp],xmm9 |
| paddd xmm3,xmm7 |
| pxor xmm15,xmm3 |
| pshufb xmm15,XMMWORD[$L$rol16] |
| paddd xmm11,xmm15 |
| pxor xmm7,xmm11 |
| movdqa xmm9,xmm7 |
| pslld xmm9,12 |
| psrld xmm7,20 |
| pxor xmm7,xmm9 |
| paddd xmm3,xmm7 |
| pxor xmm15,xmm3 |
| pshufb xmm15,XMMWORD[$L$rol8] |
| paddd xmm11,xmm15 |
| pxor xmm7,xmm11 |
| movdqa xmm9,xmm7 |
| pslld xmm9,7 |
| psrld xmm7,25 |
| pxor xmm7,xmm9 |
| DB 102,15,58,15,255,4 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,12 |
| movdqa xmm9,XMMWORD[((160+80))+rbp] |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| movdqa XMMWORD[(160+80)+rbp],xmm11 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm11,xmm4 |
| pslld xmm11,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm11 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm11,xmm4 |
| pslld xmm11,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm11 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm11,xmm5 |
| pslld xmm11,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm11 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm11,xmm5 |
| pslld xmm11,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm11 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm11,xmm6 |
| pslld xmm11,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm11 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm11,xmm6 |
| pslld xmm11,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm11 |
| DB 102,15,58,15,246,12 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,4 |
| movdqa xmm11,XMMWORD[((160+80))+rbp] |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| movdqa XMMWORD[(160+80)+rbp],xmm9 |
| paddd xmm3,xmm7 |
| pxor xmm15,xmm3 |
| pshufb xmm15,XMMWORD[$L$rol16] |
| paddd xmm11,xmm15 |
| pxor xmm7,xmm11 |
| movdqa xmm9,xmm7 |
| pslld xmm9,12 |
| psrld xmm7,20 |
| pxor xmm7,xmm9 |
| paddd xmm3,xmm7 |
| pxor xmm15,xmm3 |
| pshufb xmm15,XMMWORD[$L$rol8] |
| paddd xmm11,xmm15 |
| pxor xmm7,xmm11 |
| movdqa xmm9,xmm7 |
| pslld xmm9,7 |
| psrld xmm7,25 |
| pxor xmm7,xmm9 |
| DB 102,15,58,15,255,12 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,4 |
| movdqa xmm9,XMMWORD[((160+80))+rbp] |
| |
| add r8,16 |
| cmp r8,10*16 |
| jb NEAR $L$open_sse_tail_256_rounds_and_x1hash |
| |
| mov rcx,rbx |
| and rcx,-16 |
| $L$open_sse_tail_256_hash: |
| add r10,QWORD[((0+0))+r8*1+rsi] |
| adc r11,QWORD[((8+0))+r8*1+rsi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| add r8,16 |
| cmp r8,rcx |
| jb NEAR $L$open_sse_tail_256_hash |
| paddd xmm3,XMMWORD[$L$chacha20_consts] |
| paddd xmm7,XMMWORD[((160+48))+rbp] |
| paddd xmm11,XMMWORD[((160+64))+rbp] |
| paddd xmm15,XMMWORD[((160+144))+rbp] |
| paddd xmm2,XMMWORD[$L$chacha20_consts] |
| paddd xmm6,XMMWORD[((160+48))+rbp] |
| paddd xmm10,XMMWORD[((160+64))+rbp] |
| paddd xmm14,XMMWORD[((160+128))+rbp] |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm5,XMMWORD[((160+48))+rbp] |
| paddd xmm9,XMMWORD[((160+64))+rbp] |
| paddd xmm13,XMMWORD[((160+112))+rbp] |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| movdqa XMMWORD[(160+80)+rbp],xmm12 |
| movdqu xmm12,XMMWORD[((0 + 0))+rsi] |
| pxor xmm12,xmm3 |
| movdqu XMMWORD[(0 + 0)+rdi],xmm12 |
| movdqu xmm12,XMMWORD[((16 + 0))+rsi] |
| pxor xmm12,xmm7 |
| movdqu XMMWORD[(16 + 0)+rdi],xmm12 |
| movdqu xmm12,XMMWORD[((32 + 0))+rsi] |
| pxor xmm12,xmm11 |
| movdqu XMMWORD[(32 + 0)+rdi],xmm12 |
| movdqu xmm12,XMMWORD[((48 + 0))+rsi] |
| pxor xmm12,xmm15 |
| movdqu XMMWORD[(48 + 0)+rdi],xmm12 |
| movdqu xmm3,XMMWORD[((0 + 64))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 64))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 64))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 64))+rsi] |
| pxor xmm2,xmm3 |
| pxor xmm6,xmm7 |
| pxor xmm10,xmm11 |
| pxor xmm15,xmm14 |
| movdqu XMMWORD[(0 + 64)+rdi],xmm2 |
| movdqu XMMWORD[(16 + 64)+rdi],xmm6 |
| movdqu XMMWORD[(32 + 64)+rdi],xmm10 |
| movdqu XMMWORD[(48 + 64)+rdi],xmm15 |
| movdqu xmm3,XMMWORD[((0 + 128))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 128))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 128))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 128))+rsi] |
| pxor xmm1,xmm3 |
| pxor xmm5,xmm7 |
| pxor xmm9,xmm11 |
| pxor xmm15,xmm13 |
| movdqu XMMWORD[(0 + 128)+rdi],xmm1 |
| movdqu XMMWORD[(16 + 128)+rdi],xmm5 |
| movdqu XMMWORD[(32 + 128)+rdi],xmm9 |
| movdqu XMMWORD[(48 + 128)+rdi],xmm15 |
| |
| movdqa xmm12,XMMWORD[((160+80))+rbp] |
| sub rbx,12*16 |
| lea rsi,[192+rsi] |
| lea rdi,[192+rdi] |
| |
| |
| $L$open_sse_tail_64_dec_loop: |
| cmp rbx,16 |
| jb NEAR $L$open_sse_tail_16_init |
| sub rbx,16 |
| movdqu xmm3,XMMWORD[rsi] |
| pxor xmm0,xmm3 |
| movdqu XMMWORD[rdi],xmm0 |
| lea rsi,[16+rsi] |
| lea rdi,[16+rdi] |
| movdqa xmm0,xmm4 |
| movdqa xmm4,xmm8 |
| movdqa xmm8,xmm12 |
| jmp NEAR $L$open_sse_tail_64_dec_loop |
| $L$open_sse_tail_16_init: |
| movdqa xmm1,xmm0 |
| |
| |
| $L$open_sse_tail_16: |
| test rbx,rbx |
| jz NEAR $L$open_sse_finalize |
| |
| |
| |
| pxor xmm3,xmm3 |
| lea rsi,[((-1))+rbx*1+rsi] |
| mov r8,rbx |
| $L$open_sse_tail_16_compose: |
| pslldq xmm3,1 |
| pinsrb xmm3,BYTE[rsi],0 |
| sub rsi,1 |
| sub r8,1 |
| jnz NEAR $L$open_sse_tail_16_compose |
| |
| DB 102,73,15,126,221 |
| pextrq r14,xmm3,1 |
| |
| pxor xmm3,xmm1 |
| |
| |
| $L$open_sse_tail_16_extract: |
| pextrb XMMWORD[rdi],xmm3,0 |
| psrldq xmm3,1 |
| add rdi,1 |
| sub rbx,1 |
| jne NEAR $L$open_sse_tail_16_extract |
| |
| add r10,r13 |
| adc r11,r14 |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| |
| $L$open_sse_finalize: |
| add r10,QWORD[((0+160+32))+rbp] |
| adc r11,QWORD[((8+160+32))+rbp] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| |
| mov r13,r10 |
| mov r14,r11 |
| mov r15,r12 |
| sub r10,-5 |
| sbb r11,-1 |
| sbb r12,3 |
| cmovc r10,r13 |
| cmovc r11,r14 |
| cmovc r12,r15 |
| |
| add r10,QWORD[((0+160+16))+rbp] |
| adc r11,QWORD[((8+160+16))+rbp] |
| |
| movaps xmm6,XMMWORD[((0+0))+rbp] |
| movaps xmm7,XMMWORD[((16+0))+rbp] |
| movaps xmm8,XMMWORD[((32+0))+rbp] |
| movaps xmm9,XMMWORD[((48+0))+rbp] |
| movaps xmm10,XMMWORD[((64+0))+rbp] |
| movaps xmm11,XMMWORD[((80+0))+rbp] |
| movaps xmm12,XMMWORD[((96+0))+rbp] |
| movaps xmm13,XMMWORD[((112+0))+rbp] |
| movaps xmm14,XMMWORD[((128+0))+rbp] |
| movaps xmm15,XMMWORD[((144+0))+rbp] |
| |
| |
| add rsp,288 + 160 + 32 |
| |
| |
| pop r9 |
| |
| mov QWORD[r9],r10 |
| mov QWORD[8+r9],r11 |
| pop r15 |
| |
| pop r14 |
| |
| pop r13 |
| |
| pop r12 |
| |
| pop rbx |
| |
| pop rbp |
| |
| mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
| mov rsi,QWORD[16+rsp] |
| ret |
| |
| $L$open_sse_128: |
| |
| movdqu xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm1,xmm0 |
| movdqa xmm2,xmm0 |
| movdqu xmm4,XMMWORD[r9] |
| movdqa xmm5,xmm4 |
| movdqa xmm6,xmm4 |
| movdqu xmm8,XMMWORD[16+r9] |
| movdqa xmm9,xmm8 |
| movdqa xmm10,xmm8 |
| movdqu xmm12,XMMWORD[32+r9] |
| movdqa xmm13,xmm12 |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm14,xmm13 |
| paddd xmm14,XMMWORD[$L$sse_inc] |
| movdqa xmm7,xmm4 |
| movdqa xmm11,xmm8 |
| movdqa xmm15,xmm13 |
| mov r10,10 |
| |
| $L$open_sse_128_rounds: |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm3 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm3 |
| DB 102,15,58,15,246,4 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,12 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol16] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,12 |
| psrld xmm6,20 |
| pxor xmm6,xmm3 |
| paddd xmm2,xmm6 |
| pxor xmm14,xmm2 |
| pshufb xmm14,XMMWORD[$L$rol8] |
| paddd xmm10,xmm14 |
| pxor xmm6,xmm10 |
| movdqa xmm3,xmm6 |
| pslld xmm3,7 |
| psrld xmm6,25 |
| pxor xmm6,xmm3 |
| DB 102,15,58,15,246,12 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,4 |
| |
| dec r10 |
| jnz NEAR $L$open_sse_128_rounds |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm2,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,xmm7 |
| paddd xmm5,xmm7 |
| paddd xmm6,xmm7 |
| paddd xmm9,xmm11 |
| paddd xmm10,xmm11 |
| paddd xmm13,xmm15 |
| paddd xmm15,XMMWORD[$L$sse_inc] |
| paddd xmm14,xmm15 |
| |
| pand xmm0,XMMWORD[$L$clamp] |
| movdqa XMMWORD[(160+0)+rbp],xmm0 |
| movdqa XMMWORD[(160+16)+rbp],xmm4 |
| |
| mov r8,r8 |
| call poly_hash_ad_internal |
| $L$open_sse_128_xor_hash: |
| cmp rbx,16 |
| jb NEAR $L$open_sse_tail_16 |
| sub rbx,16 |
| add r10,QWORD[((0+0))+rsi] |
| adc r11,QWORD[((8+0))+rsi] |
| adc r12,1 |
| |
| |
| movdqu xmm3,XMMWORD[rsi] |
| pxor xmm1,xmm3 |
| movdqu XMMWORD[rdi],xmm1 |
| lea rsi,[16+rsi] |
| lea rdi,[16+rdi] |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| |
| movdqa xmm1,xmm5 |
| movdqa xmm5,xmm9 |
| movdqa xmm9,xmm13 |
| movdqa xmm13,xmm2 |
| movdqa xmm2,xmm6 |
| movdqa xmm6,xmm10 |
| movdqa xmm10,xmm14 |
| jmp NEAR $L$open_sse_128_xor_hash |
| $L$SEH_end_chacha20_poly1305_open_nohw: |
| |
| |
| |
| |
| |
| |
| |
| |
| global chacha20_poly1305_seal_nohw |
| |
| ALIGN 64 |
| chacha20_poly1305_seal_nohw: |
| mov QWORD[8+rsp],rdi ;WIN64 prologue |
| mov QWORD[16+rsp],rsi |
| mov rax,rsp |
| $L$SEH_begin_chacha20_poly1305_seal_nohw: |
| mov rdi,rcx |
| mov rsi,rdx |
| mov rdx,r8 |
| mov rcx,r9 |
| mov r8,QWORD[40+rsp] |
| mov r9,QWORD[48+rsp] |
| |
| |
| |
| _CET_ENDBR |
| push rbp |
| |
| push rbx |
| |
| push r12 |
| |
| push r13 |
| |
| push r14 |
| |
| push r15 |
| |
| |
| |
| push r9 |
| |
| sub rsp,288 + 160 + 32 |
| |
| lea rbp,[32+rsp] |
| and rbp,-32 |
| |
| movaps XMMWORD[(0+0)+rbp],xmm6 |
| movaps XMMWORD[(16+0)+rbp],xmm7 |
| movaps XMMWORD[(32+0)+rbp],xmm8 |
| movaps XMMWORD[(48+0)+rbp],xmm9 |
| movaps XMMWORD[(64+0)+rbp],xmm10 |
| movaps XMMWORD[(80+0)+rbp],xmm11 |
| movaps XMMWORD[(96+0)+rbp],xmm12 |
| movaps XMMWORD[(112+0)+rbp],xmm13 |
| movaps XMMWORD[(128+0)+rbp],xmm14 |
| movaps XMMWORD[(144+0)+rbp],xmm15 |
| |
| mov rbx,QWORD[56+r9] |
| add rbx,rdx |
| mov QWORD[((0+160+32))+rbp],r8 |
| mov QWORD[((8+160+32))+rbp],rbx |
| mov rbx,rdx |
| |
| cmp rbx,128 |
| jbe NEAR $L$seal_sse_128 |
| |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqu xmm4,XMMWORD[r9] |
| movdqu xmm8,XMMWORD[16+r9] |
| movdqu xmm12,XMMWORD[32+r9] |
| |
| movdqa xmm1,xmm0 |
| movdqa xmm2,xmm0 |
| movdqa xmm3,xmm0 |
| movdqa xmm5,xmm4 |
| movdqa xmm6,xmm4 |
| movdqa xmm7,xmm4 |
| movdqa xmm9,xmm8 |
| movdqa xmm10,xmm8 |
| movdqa xmm11,xmm8 |
| movdqa xmm15,xmm12 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa xmm14,xmm12 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa xmm13,xmm12 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| |
| movdqa XMMWORD[(160+48)+rbp],xmm4 |
| movdqa XMMWORD[(160+64)+rbp],xmm8 |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| movdqa XMMWORD[(160+112)+rbp],xmm13 |
| movdqa XMMWORD[(160+128)+rbp],xmm14 |
| movdqa XMMWORD[(160+144)+rbp],xmm15 |
| mov r10,10 |
| $L$seal_sse_init_rounds: |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,XMMWORD[$L$rol16] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,20 |
| pslld xmm7,32-20 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,20 |
| pslld xmm6,32-20 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,20 |
| pslld xmm5,32-20 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,20 |
| pslld xmm4,32-20 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[$L$rol8] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,25 |
| pslld xmm7,32-25 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,25 |
| pslld xmm6,32-25 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,25 |
| pslld xmm5,32-25 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,25 |
| pslld xmm4,32-25 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| DB 102,15,58,15,255,4 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,12 |
| DB 102,15,58,15,246,4 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,12 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,XMMWORD[$L$rol16] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,20 |
| pslld xmm7,32-20 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,20 |
| pslld xmm6,32-20 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,20 |
| pslld xmm5,32-20 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,20 |
| pslld xmm4,32-20 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[$L$rol8] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,25 |
| pslld xmm7,32-25 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,25 |
| pslld xmm6,32-25 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,25 |
| pslld xmm5,32-25 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,25 |
| pslld xmm4,32-25 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| DB 102,15,58,15,255,12 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,4 |
| DB 102,15,58,15,246,12 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,4 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| |
| dec r10 |
| jnz NEAR $L$seal_sse_init_rounds |
| paddd xmm3,XMMWORD[$L$chacha20_consts] |
| paddd xmm7,XMMWORD[((160+48))+rbp] |
| paddd xmm11,XMMWORD[((160+64))+rbp] |
| paddd xmm15,XMMWORD[((160+144))+rbp] |
| paddd xmm2,XMMWORD[$L$chacha20_consts] |
| paddd xmm6,XMMWORD[((160+48))+rbp] |
| paddd xmm10,XMMWORD[((160+64))+rbp] |
| paddd xmm14,XMMWORD[((160+128))+rbp] |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm5,XMMWORD[((160+48))+rbp] |
| paddd xmm9,XMMWORD[((160+64))+rbp] |
| paddd xmm13,XMMWORD[((160+112))+rbp] |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| |
| |
| pand xmm3,XMMWORD[$L$clamp] |
| movdqa XMMWORD[(160+0)+rbp],xmm3 |
| movdqa XMMWORD[(160+16)+rbp],xmm7 |
| |
| mov r8,r8 |
| call poly_hash_ad_internal |
| movdqu xmm3,XMMWORD[((0 + 0))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 0))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 0))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 0))+rsi] |
| pxor xmm2,xmm3 |
| pxor xmm6,xmm7 |
| pxor xmm10,xmm11 |
| pxor xmm15,xmm14 |
| movdqu XMMWORD[(0 + 0)+rdi],xmm2 |
| movdqu XMMWORD[(16 + 0)+rdi],xmm6 |
| movdqu XMMWORD[(32 + 0)+rdi],xmm10 |
| movdqu XMMWORD[(48 + 0)+rdi],xmm15 |
| movdqu xmm3,XMMWORD[((0 + 64))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 64))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 64))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 64))+rsi] |
| pxor xmm1,xmm3 |
| pxor xmm5,xmm7 |
| pxor xmm9,xmm11 |
| pxor xmm15,xmm13 |
| movdqu XMMWORD[(0 + 64)+rdi],xmm1 |
| movdqu XMMWORD[(16 + 64)+rdi],xmm5 |
| movdqu XMMWORD[(32 + 64)+rdi],xmm9 |
| movdqu XMMWORD[(48 + 64)+rdi],xmm15 |
| |
| cmp rbx,12*16 |
| ja NEAR $L$seal_sse_main_init |
| mov rcx,8*16 |
| sub rbx,8*16 |
| lea rsi,[128+rsi] |
| jmp NEAR $L$seal_sse_128_tail_hash |
| $L$seal_sse_main_init: |
| movdqu xmm3,XMMWORD[((0 + 128))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 128))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 128))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 128))+rsi] |
| pxor xmm0,xmm3 |
| pxor xmm4,xmm7 |
| pxor xmm8,xmm11 |
| pxor xmm15,xmm12 |
| movdqu XMMWORD[(0 + 128)+rdi],xmm0 |
| movdqu XMMWORD[(16 + 128)+rdi],xmm4 |
| movdqu XMMWORD[(32 + 128)+rdi],xmm8 |
| movdqu XMMWORD[(48 + 128)+rdi],xmm15 |
| |
| mov rcx,12*16 |
| sub rbx,12*16 |
| lea rsi,[192+rsi] |
| mov rcx,2 |
| mov r8,8 |
| cmp rbx,4*16 |
| jbe NEAR $L$seal_sse_tail_64 |
| cmp rbx,8*16 |
| jbe NEAR $L$seal_sse_tail_128 |
| cmp rbx,12*16 |
| jbe NEAR $L$seal_sse_tail_192 |
| |
| $L$seal_sse_main_loop: |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm1,xmm0 |
| movdqa xmm5,xmm4 |
| movdqa xmm9,xmm8 |
| movdqa xmm2,xmm0 |
| movdqa xmm6,xmm4 |
| movdqa xmm10,xmm8 |
| movdqa xmm3,xmm0 |
| movdqa xmm7,xmm4 |
| movdqa xmm11,xmm8 |
| movdqa xmm15,XMMWORD[((160+96))+rbp] |
| paddd xmm15,XMMWORD[$L$sse_inc] |
| movdqa xmm14,xmm15 |
| paddd xmm14,XMMWORD[$L$sse_inc] |
| movdqa xmm13,xmm14 |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm12,xmm13 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| movdqa XMMWORD[(160+112)+rbp],xmm13 |
| movdqa XMMWORD[(160+128)+rbp],xmm14 |
| movdqa XMMWORD[(160+144)+rbp],xmm15 |
| |
| ALIGN 32 |
| $L$seal_sse_main_rounds: |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,XMMWORD[$L$rol16] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,20 |
| pslld xmm7,32-20 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,20 |
| pslld xmm6,32-20 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,20 |
| pslld xmm5,32-20 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,20 |
| pslld xmm4,32-20 |
| pxor xmm4,xmm8 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| movdqa xmm8,XMMWORD[$L$rol8] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,25 |
| pslld xmm7,32-25 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,25 |
| pslld xmm6,32-25 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,25 |
| pslld xmm5,32-25 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,25 |
| pslld xmm4,32-25 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| DB 102,15,58,15,255,4 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,12 |
| DB 102,15,58,15,246,4 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,12 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,XMMWORD[$L$rol16] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,20 |
| pslld xmm7,32-20 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,20 |
| pslld xmm6,32-20 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,20 |
| pslld xmm5,32-20 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,20 |
| pslld xmm4,32-20 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[$L$rol8] |
| paddd xmm3,xmm7 |
| paddd xmm2,xmm6 |
| paddd xmm1,xmm5 |
| paddd xmm0,xmm4 |
| pxor xmm15,xmm3 |
| pxor xmm14,xmm2 |
| pxor xmm13,xmm1 |
| pxor xmm12,xmm0 |
| DB 102,69,15,56,0,248 |
| DB 102,69,15,56,0,240 |
| DB 102,69,15,56,0,232 |
| DB 102,69,15,56,0,224 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| paddd xmm11,xmm15 |
| paddd xmm10,xmm14 |
| paddd xmm9,xmm13 |
| paddd xmm8,xmm12 |
| pxor xmm7,xmm11 |
| pxor xmm6,xmm10 |
| pxor xmm5,xmm9 |
| pxor xmm4,xmm8 |
| movdqa XMMWORD[(160+80)+rbp],xmm8 |
| movdqa xmm8,xmm7 |
| psrld xmm8,25 |
| pslld xmm7,32-25 |
| pxor xmm7,xmm8 |
| movdqa xmm8,xmm6 |
| psrld xmm8,25 |
| pslld xmm6,32-25 |
| pxor xmm6,xmm8 |
| movdqa xmm8,xmm5 |
| psrld xmm8,25 |
| pslld xmm5,32-25 |
| pxor xmm5,xmm8 |
| movdqa xmm8,xmm4 |
| psrld xmm8,25 |
| pslld xmm4,32-25 |
| pxor xmm4,xmm8 |
| movdqa xmm8,XMMWORD[((160+80))+rbp] |
| DB 102,15,58,15,255,12 |
| DB 102,69,15,58,15,219,8 |
| DB 102,69,15,58,15,255,4 |
| DB 102,15,58,15,246,12 |
| DB 102,69,15,58,15,210,8 |
| DB 102,69,15,58,15,246,4 |
| DB 102,15,58,15,237,12 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,4 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| |
| lea rdi,[16+rdi] |
| dec r8 |
| jge NEAR $L$seal_sse_main_rounds |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[16+rdi] |
| dec rcx |
| jg NEAR $L$seal_sse_main_rounds |
| paddd xmm3,XMMWORD[$L$chacha20_consts] |
| paddd xmm7,XMMWORD[((160+48))+rbp] |
| paddd xmm11,XMMWORD[((160+64))+rbp] |
| paddd xmm15,XMMWORD[((160+144))+rbp] |
| paddd xmm2,XMMWORD[$L$chacha20_consts] |
| paddd xmm6,XMMWORD[((160+48))+rbp] |
| paddd xmm10,XMMWORD[((160+64))+rbp] |
| paddd xmm14,XMMWORD[((160+128))+rbp] |
| paddd xmm1,XMMWORD[$L$chacha20_consts] |
| paddd xmm5,XMMWORD[((160+48))+rbp] |
| paddd xmm9,XMMWORD[((160+64))+rbp] |
| paddd xmm13,XMMWORD[((160+112))+rbp] |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| |
| movdqa XMMWORD[(160+80)+rbp],xmm14 |
| movdqa XMMWORD[(160+80)+rbp],xmm14 |
| movdqu xmm14,XMMWORD[((0 + 0))+rsi] |
| pxor xmm14,xmm3 |
| movdqu XMMWORD[(0 + 0)+rdi],xmm14 |
| movdqu xmm14,XMMWORD[((16 + 0))+rsi] |
| pxor xmm14,xmm7 |
| movdqu XMMWORD[(16 + 0)+rdi],xmm14 |
| movdqu xmm14,XMMWORD[((32 + 0))+rsi] |
| pxor xmm14,xmm11 |
| movdqu XMMWORD[(32 + 0)+rdi],xmm14 |
| movdqu xmm14,XMMWORD[((48 + 0))+rsi] |
| pxor xmm14,xmm15 |
| movdqu XMMWORD[(48 + 0)+rdi],xmm14 |
| |
| movdqa xmm14,XMMWORD[((160+80))+rbp] |
| movdqu xmm3,XMMWORD[((0 + 64))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 64))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 64))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 64))+rsi] |
| pxor xmm2,xmm3 |
| pxor xmm6,xmm7 |
| pxor xmm10,xmm11 |
| pxor xmm15,xmm14 |
| movdqu XMMWORD[(0 + 64)+rdi],xmm2 |
| movdqu XMMWORD[(16 + 64)+rdi],xmm6 |
| movdqu XMMWORD[(32 + 64)+rdi],xmm10 |
| movdqu XMMWORD[(48 + 64)+rdi],xmm15 |
| movdqu xmm3,XMMWORD[((0 + 128))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 128))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 128))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 128))+rsi] |
| pxor xmm1,xmm3 |
| pxor xmm5,xmm7 |
| pxor xmm9,xmm11 |
| pxor xmm15,xmm13 |
| movdqu XMMWORD[(0 + 128)+rdi],xmm1 |
| movdqu XMMWORD[(16 + 128)+rdi],xmm5 |
| movdqu XMMWORD[(32 + 128)+rdi],xmm9 |
| movdqu XMMWORD[(48 + 128)+rdi],xmm15 |
| |
| cmp rbx,16*16 |
| ja NEAR $L$seal_sse_main_loop_xor |
| |
| mov rcx,12*16 |
| sub rbx,12*16 |
| lea rsi,[192+rsi] |
| jmp NEAR $L$seal_sse_128_tail_hash |
| $L$seal_sse_main_loop_xor: |
| movdqu xmm3,XMMWORD[((0 + 192))+rsi] |
| movdqu xmm7,XMMWORD[((16 + 192))+rsi] |
| movdqu xmm11,XMMWORD[((32 + 192))+rsi] |
| movdqu xmm15,XMMWORD[((48 + 192))+rsi] |
| pxor xmm0,xmm3 |
| pxor xmm4,xmm7 |
| pxor xmm8,xmm11 |
| pxor xmm15,xmm12 |
| movdqu XMMWORD[(0 + 192)+rdi],xmm0 |
| movdqu XMMWORD[(16 + 192)+rdi],xmm4 |
| movdqu XMMWORD[(32 + 192)+rdi],xmm8 |
| movdqu XMMWORD[(48 + 192)+rdi],xmm15 |
| |
| lea rsi,[256+rsi] |
| sub rbx,16*16 |
| mov rcx,6 |
| mov r8,4 |
| cmp rbx,12*16 |
| jg NEAR $L$seal_sse_main_loop |
| mov rcx,rbx |
| test rbx,rbx |
| je NEAR $L$seal_sse_128_tail_hash |
| mov rcx,6 |
| cmp rbx,8*16 |
| ja NEAR $L$seal_sse_tail_192 |
| cmp rbx,4*16 |
| ja NEAR $L$seal_sse_tail_128 |
| |
| $L$seal_sse_tail_64: |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm12,XMMWORD[((160+96))+rbp] |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| |
| $L$seal_sse_tail_64_rounds_and_x2hash: |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[16+rdi] |
| $L$seal_sse_tail_64_rounds_and_x1hash: |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,12 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,4 |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[16+rdi] |
| dec rcx |
| jg NEAR $L$seal_sse_tail_64_rounds_and_x2hash |
| dec r8 |
| jge NEAR $L$seal_sse_tail_64_rounds_and_x1hash |
| paddd xmm0,XMMWORD[$L$chacha20_consts] |
| paddd xmm4,XMMWORD[((160+48))+rbp] |
| paddd xmm8,XMMWORD[((160+64))+rbp] |
| paddd xmm12,XMMWORD[((160+96))+rbp] |
| |
| jmp NEAR $L$seal_sse_128_tail_xor |
| |
| $L$seal_sse_tail_128: |
| movdqa xmm0,XMMWORD[$L$chacha20_consts] |
| movdqa xmm4,XMMWORD[((160+48))+rbp] |
| movdqa xmm8,XMMWORD[((160+64))+rbp] |
| movdqa xmm1,xmm0 |
| movdqa xmm5,xmm4 |
| movdqa xmm9,xmm8 |
| movdqa xmm13,XMMWORD[((160+96))+rbp] |
| paddd xmm13,XMMWORD[$L$sse_inc] |
| movdqa xmm12,xmm13 |
| paddd xmm12,XMMWORD[$L$sse_inc] |
| movdqa XMMWORD[(160+96)+rbp],xmm12 |
| movdqa XMMWORD[(160+112)+rbp],xmm13 |
| |
| $L$seal_sse_tail_128_rounds_and_x2hash: |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+rbp] |
| mov r15,rax |
| mul r10 |
| mov r13,rax |
| mov r14,rdx |
| mov rax,QWORD[((0+160+0))+rbp] |
| mul r11 |
| imul r15,r12 |
| add r14,rax |
| adc r15,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mov r9,rax |
| mul r10 |
| add r14,rax |
| adc rdx,0 |
| mov r10,rdx |
| mov rax,QWORD[((8+160+0))+rbp] |
| mul r11 |
| add r15,rax |
| adc rdx,0 |
| imul r9,r12 |
| add r15,r10 |
| adc r9,rdx |
| mov r10,r13 |
| mov r11,r14 |
| mov r12,r15 |
| and r12,3 |
| mov r13,r15 |
| and r13,-4 |
| mov r14,r9 |
| shrd r15,r9,2 |
| shr r9,2 |
| add r15,r13 |
| adc r9,r14 |
| add r10,r15 |
| adc r11,r9 |
| adc r12,0 |
| |
| lea rdi,[16+rdi] |
| $L$seal_sse_tail_128_rounds_and_x1hash: |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol16] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,12 |
| psrld xmm4,20 |
| pxor xmm4,xmm3 |
| paddd xmm0,xmm4 |
| pxor xmm12,xmm0 |
| pshufb xmm12,XMMWORD[$L$rol8] |
| paddd xmm8,xmm12 |
| pxor xmm4,xmm8 |
| movdqa xmm3,xmm4 |
| pslld xmm3,7 |
| psrld xmm4,25 |
| pxor xmm4,xmm3 |
| DB 102,15,58,15,228,4 |
| DB 102,69,15,58,15,192,8 |
| DB 102,69,15,58,15,228,12 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol16] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,12 |
| psrld xmm5,20 |
| pxor xmm5,xmm3 |
| paddd xmm1,xmm5 |
| pxor xmm13,xmm1 |
| pshufb xmm13,XMMWORD[$L$rol8] |
| paddd xmm9,xmm13 |
| pxor xmm5,xmm9 |
| movdqa xmm3,xmm5 |
| pslld xmm3,7 |
| psrld xmm5,25 |
| pxor xmm5,xmm3 |
| DB 102,15,58,15,237,4 |
| DB 102,69,15,58,15,201,8 |
| DB 102,69,15,58,15,237,12 |
| add r10,QWORD[((0+0))+rdi] |
| adc r11,QWORD[((8+0))+rdi] |
| adc r12,1 |
| mov rax,QWORD[((0+160+0))+ |