|  | // This file is generated from a similarly-named Perl script in the BoringSSL | 
|  | // source tree. Do not edit by hand. | 
|  |  | 
|  | #include <openssl/asm_base.h> | 
|  |  | 
|  | #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) | 
|  | .section	__TEXT,__const | 
|  |  | 
|  | .align	5 | 
|  | Lsigma: | 
|  | .quad	0x3320646e61707865,0x6b20657479622d32		// endian-neutral | 
|  | Lone: | 
|  | .long	1,0,0,0 | 
|  | .byte	67,104,97,67,104,97,50,48,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 | 
|  | .align	2 | 
|  |  | 
|  | .text | 
|  |  | 
|  | .globl	_ChaCha20_ctr32_nohw | 
|  | .private_extern	_ChaCha20_ctr32_nohw | 
|  |  | 
|  | .align	5 | 
|  | _ChaCha20_ctr32_nohw: | 
|  | AARCH64_SIGN_LINK_REGISTER | 
|  | stp	x29,x30,[sp,#-96]! | 
|  | add	x29,sp,#0 | 
|  |  | 
|  | adrp	x5,Lsigma@PAGE | 
|  | add	x5,x5,Lsigma@PAGEOFF | 
|  | stp	x19,x20,[sp,#16] | 
|  | stp	x21,x22,[sp,#32] | 
|  | stp	x23,x24,[sp,#48] | 
|  | stp	x25,x26,[sp,#64] | 
|  | stp	x27,x28,[sp,#80] | 
|  | sub	sp,sp,#64 | 
|  |  | 
|  | ldp	x22,x23,[x5]		// load sigma | 
|  | ldp	x24,x25,[x3]		// load key | 
|  | ldp	x26,x27,[x3,#16] | 
|  | ldp	x28,x30,[x4]		// load counter | 
|  | #ifdef	__AARCH64EB__ | 
|  | ror	x24,x24,#32 | 
|  | ror	x25,x25,#32 | 
|  | ror	x26,x26,#32 | 
|  | ror	x27,x27,#32 | 
|  | ror	x28,x28,#32 | 
|  | ror	x30,x30,#32 | 
|  | #endif | 
|  |  | 
|  | Loop_outer: | 
|  | mov	w5,w22			// unpack key block | 
|  | lsr	x6,x22,#32 | 
|  | mov	w7,w23 | 
|  | lsr	x8,x23,#32 | 
|  | mov	w9,w24 | 
|  | lsr	x10,x24,#32 | 
|  | mov	w11,w25 | 
|  | lsr	x12,x25,#32 | 
|  | mov	w13,w26 | 
|  | lsr	x14,x26,#32 | 
|  | mov	w15,w27 | 
|  | lsr	x16,x27,#32 | 
|  | mov	w17,w28 | 
|  | lsr	x19,x28,#32 | 
|  | mov	w20,w30 | 
|  | lsr	x21,x30,#32 | 
|  |  | 
|  | mov	x4,#10 | 
|  | subs	x2,x2,#64 | 
|  | Loop: | 
|  | sub	x4,x4,#1 | 
|  | add	w5,w5,w9 | 
|  | add	w6,w6,w10 | 
|  | add	w7,w7,w11 | 
|  | add	w8,w8,w12 | 
|  | eor	w17,w17,w5 | 
|  | eor	w19,w19,w6 | 
|  | eor	w20,w20,w7 | 
|  | eor	w21,w21,w8 | 
|  | ror	w17,w17,#16 | 
|  | ror	w19,w19,#16 | 
|  | ror	w20,w20,#16 | 
|  | ror	w21,w21,#16 | 
|  | add	w13,w13,w17 | 
|  | add	w14,w14,w19 | 
|  | add	w15,w15,w20 | 
|  | add	w16,w16,w21 | 
|  | eor	w9,w9,w13 | 
|  | eor	w10,w10,w14 | 
|  | eor	w11,w11,w15 | 
|  | eor	w12,w12,w16 | 
|  | ror	w9,w9,#20 | 
|  | ror	w10,w10,#20 | 
|  | ror	w11,w11,#20 | 
|  | ror	w12,w12,#20 | 
|  | add	w5,w5,w9 | 
|  | add	w6,w6,w10 | 
|  | add	w7,w7,w11 | 
|  | add	w8,w8,w12 | 
|  | eor	w17,w17,w5 | 
|  | eor	w19,w19,w6 | 
|  | eor	w20,w20,w7 | 
|  | eor	w21,w21,w8 | 
|  | ror	w17,w17,#24 | 
|  | ror	w19,w19,#24 | 
|  | ror	w20,w20,#24 | 
|  | ror	w21,w21,#24 | 
|  | add	w13,w13,w17 | 
|  | add	w14,w14,w19 | 
|  | add	w15,w15,w20 | 
|  | add	w16,w16,w21 | 
|  | eor	w9,w9,w13 | 
|  | eor	w10,w10,w14 | 
|  | eor	w11,w11,w15 | 
|  | eor	w12,w12,w16 | 
|  | ror	w9,w9,#25 | 
|  | ror	w10,w10,#25 | 
|  | ror	w11,w11,#25 | 
|  | ror	w12,w12,#25 | 
|  | add	w5,w5,w10 | 
|  | add	w6,w6,w11 | 
|  | add	w7,w7,w12 | 
|  | add	w8,w8,w9 | 
|  | eor	w21,w21,w5 | 
|  | eor	w17,w17,w6 | 
|  | eor	w19,w19,w7 | 
|  | eor	w20,w20,w8 | 
|  | ror	w21,w21,#16 | 
|  | ror	w17,w17,#16 | 
|  | ror	w19,w19,#16 | 
|  | ror	w20,w20,#16 | 
|  | add	w15,w15,w21 | 
|  | add	w16,w16,w17 | 
|  | add	w13,w13,w19 | 
|  | add	w14,w14,w20 | 
|  | eor	w10,w10,w15 | 
|  | eor	w11,w11,w16 | 
|  | eor	w12,w12,w13 | 
|  | eor	w9,w9,w14 | 
|  | ror	w10,w10,#20 | 
|  | ror	w11,w11,#20 | 
|  | ror	w12,w12,#20 | 
|  | ror	w9,w9,#20 | 
|  | add	w5,w5,w10 | 
|  | add	w6,w6,w11 | 
|  | add	w7,w7,w12 | 
|  | add	w8,w8,w9 | 
|  | eor	w21,w21,w5 | 
|  | eor	w17,w17,w6 | 
|  | eor	w19,w19,w7 | 
|  | eor	w20,w20,w8 | 
|  | ror	w21,w21,#24 | 
|  | ror	w17,w17,#24 | 
|  | ror	w19,w19,#24 | 
|  | ror	w20,w20,#24 | 
|  | add	w15,w15,w21 | 
|  | add	w16,w16,w17 | 
|  | add	w13,w13,w19 | 
|  | add	w14,w14,w20 | 
|  | eor	w10,w10,w15 | 
|  | eor	w11,w11,w16 | 
|  | eor	w12,w12,w13 | 
|  | eor	w9,w9,w14 | 
|  | ror	w10,w10,#25 | 
|  | ror	w11,w11,#25 | 
|  | ror	w12,w12,#25 | 
|  | ror	w9,w9,#25 | 
|  | cbnz	x4,Loop | 
|  |  | 
|  | add	w5,w5,w22		// accumulate key block | 
|  | add	x6,x6,x22,lsr#32 | 
|  | add	w7,w7,w23 | 
|  | add	x8,x8,x23,lsr#32 | 
|  | add	w9,w9,w24 | 
|  | add	x10,x10,x24,lsr#32 | 
|  | add	w11,w11,w25 | 
|  | add	x12,x12,x25,lsr#32 | 
|  | add	w13,w13,w26 | 
|  | add	x14,x14,x26,lsr#32 | 
|  | add	w15,w15,w27 | 
|  | add	x16,x16,x27,lsr#32 | 
|  | add	w17,w17,w28 | 
|  | add	x19,x19,x28,lsr#32 | 
|  | add	w20,w20,w30 | 
|  | add	x21,x21,x30,lsr#32 | 
|  |  | 
|  | b.lo	Ltail | 
|  |  | 
|  | add	x5,x5,x6,lsl#32	// pack | 
|  | add	x7,x7,x8,lsl#32 | 
|  | ldp	x6,x8,[x1,#0]		// load input | 
|  | add	x9,x9,x10,lsl#32 | 
|  | add	x11,x11,x12,lsl#32 | 
|  | ldp	x10,x12,[x1,#16] | 
|  | add	x13,x13,x14,lsl#32 | 
|  | add	x15,x15,x16,lsl#32 | 
|  | ldp	x14,x16,[x1,#32] | 
|  | add	x17,x17,x19,lsl#32 | 
|  | add	x20,x20,x21,lsl#32 | 
|  | ldp	x19,x21,[x1,#48] | 
|  | add	x1,x1,#64 | 
|  | #ifdef	__AARCH64EB__ | 
|  | rev	x5,x5 | 
|  | rev	x7,x7 | 
|  | rev	x9,x9 | 
|  | rev	x11,x11 | 
|  | rev	x13,x13 | 
|  | rev	x15,x15 | 
|  | rev	x17,x17 | 
|  | rev	x20,x20 | 
|  | #endif | 
|  | eor	x5,x5,x6 | 
|  | eor	x7,x7,x8 | 
|  | eor	x9,x9,x10 | 
|  | eor	x11,x11,x12 | 
|  | eor	x13,x13,x14 | 
|  | eor	x15,x15,x16 | 
|  | eor	x17,x17,x19 | 
|  | eor	x20,x20,x21 | 
|  |  | 
|  | stp	x5,x7,[x0,#0]		// store output | 
|  | add	x28,x28,#1			// increment counter | 
|  | stp	x9,x11,[x0,#16] | 
|  | stp	x13,x15,[x0,#32] | 
|  | stp	x17,x20,[x0,#48] | 
|  | add	x0,x0,#64 | 
|  |  | 
|  | b.hi	Loop_outer | 
|  |  | 
|  | ldp	x19,x20,[x29,#16] | 
|  | add	sp,sp,#64 | 
|  | ldp	x21,x22,[x29,#32] | 
|  | ldp	x23,x24,[x29,#48] | 
|  | ldp	x25,x26,[x29,#64] | 
|  | ldp	x27,x28,[x29,#80] | 
|  | ldp	x29,x30,[sp],#96 | 
|  | AARCH64_VALIDATE_LINK_REGISTER | 
|  | ret | 
|  |  | 
|  | .align	4 | 
|  | Ltail: | 
|  | add	x2,x2,#64 | 
|  | Less_than_64: | 
|  | sub	x0,x0,#1 | 
|  | add	x1,x1,x2 | 
|  | add	x0,x0,x2 | 
|  | add	x4,sp,x2 | 
|  | neg	x2,x2 | 
|  |  | 
|  | add	x5,x5,x6,lsl#32	// pack | 
|  | add	x7,x7,x8,lsl#32 | 
|  | add	x9,x9,x10,lsl#32 | 
|  | add	x11,x11,x12,lsl#32 | 
|  | add	x13,x13,x14,lsl#32 | 
|  | add	x15,x15,x16,lsl#32 | 
|  | add	x17,x17,x19,lsl#32 | 
|  | add	x20,x20,x21,lsl#32 | 
|  | #ifdef	__AARCH64EB__ | 
|  | rev	x5,x5 | 
|  | rev	x7,x7 | 
|  | rev	x9,x9 | 
|  | rev	x11,x11 | 
|  | rev	x13,x13 | 
|  | rev	x15,x15 | 
|  | rev	x17,x17 | 
|  | rev	x20,x20 | 
|  | #endif | 
|  | stp	x5,x7,[sp,#0] | 
|  | stp	x9,x11,[sp,#16] | 
|  | stp	x13,x15,[sp,#32] | 
|  | stp	x17,x20,[sp,#48] | 
|  |  | 
|  | Loop_tail: | 
|  | ldrb	w10,[x1,x2] | 
|  | ldrb	w11,[x4,x2] | 
|  | add	x2,x2,#1 | 
|  | eor	w10,w10,w11 | 
|  | strb	w10,[x0,x2] | 
|  | cbnz	x2,Loop_tail | 
|  |  | 
|  | stp	xzr,xzr,[sp,#0] | 
|  | stp	xzr,xzr,[sp,#16] | 
|  | stp	xzr,xzr,[sp,#32] | 
|  | stp	xzr,xzr,[sp,#48] | 
|  |  | 
|  | ldp	x19,x20,[x29,#16] | 
|  | add	sp,sp,#64 | 
|  | ldp	x21,x22,[x29,#32] | 
|  | ldp	x23,x24,[x29,#48] | 
|  | ldp	x25,x26,[x29,#64] | 
|  | ldp	x27,x28,[x29,#80] | 
|  | ldp	x29,x30,[sp],#96 | 
|  | AARCH64_VALIDATE_LINK_REGISTER | 
|  | ret | 
|  |  | 
|  |  | 
|  | .globl	_ChaCha20_ctr32_neon | 
|  | .private_extern	_ChaCha20_ctr32_neon | 
|  |  | 
|  | .align	5 | 
|  | _ChaCha20_ctr32_neon: | 
|  | AARCH64_SIGN_LINK_REGISTER | 
|  | stp	x29,x30,[sp,#-96]! | 
|  | add	x29,sp,#0 | 
|  |  | 
|  | adrp	x5,Lsigma@PAGE | 
|  | add	x5,x5,Lsigma@PAGEOFF | 
|  | stp	x19,x20,[sp,#16] | 
|  | stp	x21,x22,[sp,#32] | 
|  | stp	x23,x24,[sp,#48] | 
|  | stp	x25,x26,[sp,#64] | 
|  | stp	x27,x28,[sp,#80] | 
|  | cmp	x2,#512 | 
|  | b.hs	L512_or_more_neon | 
|  |  | 
|  | sub	sp,sp,#64 | 
|  |  | 
|  | ldp	x22,x23,[x5]		// load sigma | 
|  | ld1	{v24.4s},[x5],#16 | 
|  | ldp	x24,x25,[x3]		// load key | 
|  | ldp	x26,x27,[x3,#16] | 
|  | ld1	{v25.4s,v26.4s},[x3] | 
|  | ldp	x28,x30,[x4]		// load counter | 
|  | ld1	{v27.4s},[x4] | 
|  | ld1	{v31.4s},[x5] | 
|  | #ifdef	__AARCH64EB__ | 
|  | rev64	v24.4s,v24.4s | 
|  | ror	x24,x24,#32 | 
|  | ror	x25,x25,#32 | 
|  | ror	x26,x26,#32 | 
|  | ror	x27,x27,#32 | 
|  | ror	x28,x28,#32 | 
|  | ror	x30,x30,#32 | 
|  | #endif | 
|  | add	v27.4s,v27.4s,v31.4s		// += 1 | 
|  | add	v28.4s,v27.4s,v31.4s | 
|  | add	v29.4s,v28.4s,v31.4s | 
|  | shl	v31.4s,v31.4s,#2			// 1 -> 4 | 
|  |  | 
|  | Loop_outer_neon: | 
|  | mov	w5,w22			// unpack key block | 
|  | lsr	x6,x22,#32 | 
|  | mov	v0.16b,v24.16b | 
|  | mov	w7,w23 | 
|  | lsr	x8,x23,#32 | 
|  | mov	v4.16b,v24.16b | 
|  | mov	w9,w24 | 
|  | lsr	x10,x24,#32 | 
|  | mov	v16.16b,v24.16b | 
|  | mov	w11,w25 | 
|  | mov	v1.16b,v25.16b | 
|  | lsr	x12,x25,#32 | 
|  | mov	v5.16b,v25.16b | 
|  | mov	w13,w26 | 
|  | mov	v17.16b,v25.16b | 
|  | lsr	x14,x26,#32 | 
|  | mov	v3.16b,v27.16b | 
|  | mov	w15,w27 | 
|  | mov	v7.16b,v28.16b | 
|  | lsr	x16,x27,#32 | 
|  | mov	v19.16b,v29.16b | 
|  | mov	w17,w28 | 
|  | mov	v2.16b,v26.16b | 
|  | lsr	x19,x28,#32 | 
|  | mov	v6.16b,v26.16b | 
|  | mov	w20,w30 | 
|  | mov	v18.16b,v26.16b | 
|  | lsr	x21,x30,#32 | 
|  |  | 
|  | mov	x4,#10 | 
|  | subs	x2,x2,#256 | 
|  | Loop_neon: | 
|  | sub	x4,x4,#1 | 
|  | add	v0.4s,v0.4s,v1.4s | 
|  | add	w5,w5,w9 | 
|  | add	v4.4s,v4.4s,v5.4s | 
|  | add	w6,w6,w10 | 
|  | add	v16.4s,v16.4s,v17.4s | 
|  | add	w7,w7,w11 | 
|  | eor	v3.16b,v3.16b,v0.16b | 
|  | add	w8,w8,w12 | 
|  | eor	v7.16b,v7.16b,v4.16b | 
|  | eor	w17,w17,w5 | 
|  | eor	v19.16b,v19.16b,v16.16b | 
|  | eor	w19,w19,w6 | 
|  | rev32	v3.8h,v3.8h | 
|  | eor	w20,w20,w7 | 
|  | rev32	v7.8h,v7.8h | 
|  | eor	w21,w21,w8 | 
|  | rev32	v19.8h,v19.8h | 
|  | ror	w17,w17,#16 | 
|  | add	v2.4s,v2.4s,v3.4s | 
|  | ror	w19,w19,#16 | 
|  | add	v6.4s,v6.4s,v7.4s | 
|  | ror	w20,w20,#16 | 
|  | add	v18.4s,v18.4s,v19.4s | 
|  | ror	w21,w21,#16 | 
|  | eor	v20.16b,v1.16b,v2.16b | 
|  | add	w13,w13,w17 | 
|  | eor	v21.16b,v5.16b,v6.16b | 
|  | add	w14,w14,w19 | 
|  | eor	v22.16b,v17.16b,v18.16b | 
|  | add	w15,w15,w20 | 
|  | ushr	v1.4s,v20.4s,#20 | 
|  | add	w16,w16,w21 | 
|  | ushr	v5.4s,v21.4s,#20 | 
|  | eor	w9,w9,w13 | 
|  | ushr	v17.4s,v22.4s,#20 | 
|  | eor	w10,w10,w14 | 
|  | sli	v1.4s,v20.4s,#12 | 
|  | eor	w11,w11,w15 | 
|  | sli	v5.4s,v21.4s,#12 | 
|  | eor	w12,w12,w16 | 
|  | sli	v17.4s,v22.4s,#12 | 
|  | ror	w9,w9,#20 | 
|  | add	v0.4s,v0.4s,v1.4s | 
|  | ror	w10,w10,#20 | 
|  | add	v4.4s,v4.4s,v5.4s | 
|  | ror	w11,w11,#20 | 
|  | add	v16.4s,v16.4s,v17.4s | 
|  | ror	w12,w12,#20 | 
|  | eor	v20.16b,v3.16b,v0.16b | 
|  | add	w5,w5,w9 | 
|  | eor	v21.16b,v7.16b,v4.16b | 
|  | add	w6,w6,w10 | 
|  | eor	v22.16b,v19.16b,v16.16b | 
|  | add	w7,w7,w11 | 
|  | ushr	v3.4s,v20.4s,#24 | 
|  | add	w8,w8,w12 | 
|  | ushr	v7.4s,v21.4s,#24 | 
|  | eor	w17,w17,w5 | 
|  | ushr	v19.4s,v22.4s,#24 | 
|  | eor	w19,w19,w6 | 
|  | sli	v3.4s,v20.4s,#8 | 
|  | eor	w20,w20,w7 | 
|  | sli	v7.4s,v21.4s,#8 | 
|  | eor	w21,w21,w8 | 
|  | sli	v19.4s,v22.4s,#8 | 
|  | ror	w17,w17,#24 | 
|  | add	v2.4s,v2.4s,v3.4s | 
|  | ror	w19,w19,#24 | 
|  | add	v6.4s,v6.4s,v7.4s | 
|  | ror	w20,w20,#24 | 
|  | add	v18.4s,v18.4s,v19.4s | 
|  | ror	w21,w21,#24 | 
|  | eor	v20.16b,v1.16b,v2.16b | 
|  | add	w13,w13,w17 | 
|  | eor	v21.16b,v5.16b,v6.16b | 
|  | add	w14,w14,w19 | 
|  | eor	v22.16b,v17.16b,v18.16b | 
|  | add	w15,w15,w20 | 
|  | ushr	v1.4s,v20.4s,#25 | 
|  | add	w16,w16,w21 | 
|  | ushr	v5.4s,v21.4s,#25 | 
|  | eor	w9,w9,w13 | 
|  | ushr	v17.4s,v22.4s,#25 | 
|  | eor	w10,w10,w14 | 
|  | sli	v1.4s,v20.4s,#7 | 
|  | eor	w11,w11,w15 | 
|  | sli	v5.4s,v21.4s,#7 | 
|  | eor	w12,w12,w16 | 
|  | sli	v17.4s,v22.4s,#7 | 
|  | ror	w9,w9,#25 | 
|  | ext	v2.16b,v2.16b,v2.16b,#8 | 
|  | ror	w10,w10,#25 | 
|  | ext	v6.16b,v6.16b,v6.16b,#8 | 
|  | ror	w11,w11,#25 | 
|  | ext	v18.16b,v18.16b,v18.16b,#8 | 
|  | ror	w12,w12,#25 | 
|  | ext	v3.16b,v3.16b,v3.16b,#12 | 
|  | ext	v7.16b,v7.16b,v7.16b,#12 | 
|  | ext	v19.16b,v19.16b,v19.16b,#12 | 
|  | ext	v1.16b,v1.16b,v1.16b,#4 | 
|  | ext	v5.16b,v5.16b,v5.16b,#4 | 
|  | ext	v17.16b,v17.16b,v17.16b,#4 | 
|  | add	v0.4s,v0.4s,v1.4s | 
|  | add	w5,w5,w10 | 
|  | add	v4.4s,v4.4s,v5.4s | 
|  | add	w6,w6,w11 | 
|  | add	v16.4s,v16.4s,v17.4s | 
|  | add	w7,w7,w12 | 
|  | eor	v3.16b,v3.16b,v0.16b | 
|  | add	w8,w8,w9 | 
|  | eor	v7.16b,v7.16b,v4.16b | 
|  | eor	w21,w21,w5 | 
|  | eor	v19.16b,v19.16b,v16.16b | 
|  | eor	w17,w17,w6 | 
|  | rev32	v3.8h,v3.8h | 
|  | eor	w19,w19,w7 | 
|  | rev32	v7.8h,v7.8h | 
|  | eor	w20,w20,w8 | 
|  | rev32	v19.8h,v19.8h | 
|  | ror	w21,w21,#16 | 
|  | add	v2.4s,v2.4s,v3.4s | 
|  | ror	w17,w17,#16 | 
|  | add	v6.4s,v6.4s,v7.4s | 
|  | ror	w19,w19,#16 | 
|  | add	v18.4s,v18.4s,v19.4s | 
|  | ror	w20,w20,#16 | 
|  | eor	v20.16b,v1.16b,v2.16b | 
|  | add	w15,w15,w21 | 
|  | eor	v21.16b,v5.16b,v6.16b | 
|  | add	w16,w16,w17 | 
|  | eor	v22.16b,v17.16b,v18.16b | 
|  | add	w13,w13,w19 | 
|  | ushr	v1.4s,v20.4s,#20 | 
|  | add	w14,w14,w20 | 
|  | ushr	v5.4s,v21.4s,#20 | 
|  | eor	w10,w10,w15 | 
|  | ushr	v17.4s,v22.4s,#20 | 
|  | eor	w11,w11,w16 | 
|  | sli	v1.4s,v20.4s,#12 | 
|  | eor	w12,w12,w13 | 
|  | sli	v5.4s,v21.4s,#12 | 
|  | eor	w9,w9,w14 | 
|  | sli	v17.4s,v22.4s,#12 | 
|  | ror	w10,w10,#20 | 
|  | add	v0.4s,v0.4s,v1.4s | 
|  | ror	w11,w11,#20 | 
|  | add	v4.4s,v4.4s,v5.4s | 
|  | ror	w12,w12,#20 | 
|  | add	v16.4s,v16.4s,v17.4s | 
|  | ror	w9,w9,#20 | 
|  | eor	v20.16b,v3.16b,v0.16b | 
|  | add	w5,w5,w10 | 
|  | eor	v21.16b,v7.16b,v4.16b | 
|  | add	w6,w6,w11 | 
|  | eor	v22.16b,v19.16b,v16.16b | 
|  | add	w7,w7,w12 | 
|  | ushr	v3.4s,v20.4s,#24 | 
|  | add	w8,w8,w9 | 
|  | ushr	v7.4s,v21.4s,#24 | 
|  | eor	w21,w21,w5 | 
|  | ushr	v19.4s,v22.4s,#24 | 
|  | eor	w17,w17,w6 | 
|  | sli	v3.4s,v20.4s,#8 | 
|  | eor	w19,w19,w7 | 
|  | sli	v7.4s,v21.4s,#8 | 
|  | eor	w20,w20,w8 | 
|  | sli	v19.4s,v22.4s,#8 | 
|  | ror	w21,w21,#24 | 
|  | add	v2.4s,v2.4s,v3.4s | 
|  | ror	w17,w17,#24 | 
|  | add	v6.4s,v6.4s,v7.4s | 
|  | ror	w19,w19,#24 | 
|  | add	v18.4s,v18.4s,v19.4s | 
|  | ror	w20,w20,#24 | 
|  | eor	v20.16b,v1.16b,v2.16b | 
|  | add	w15,w15,w21 | 
|  | eor	v21.16b,v5.16b,v6.16b | 
|  | add	w16,w16,w17 | 
|  | eor	v22.16b,v17.16b,v18.16b | 
|  | add	w13,w13,w19 | 
|  | ushr	v1.4s,v20.4s,#25 | 
|  | add	w14,w14,w20 | 
|  | ushr	v5.4s,v21.4s,#25 | 
|  | eor	w10,w10,w15 | 
|  | ushr	v17.4s,v22.4s,#25 | 
|  | eor	w11,w11,w16 | 
|  | sli	v1.4s,v20.4s,#7 | 
|  | eor	w12,w12,w13 | 
|  | sli	v5.4s,v21.4s,#7 | 
|  | eor	w9,w9,w14 | 
|  | sli	v17.4s,v22.4s,#7 | 
|  | ror	w10,w10,#25 | 
|  | ext	v2.16b,v2.16b,v2.16b,#8 | 
|  | ror	w11,w11,#25 | 
|  | ext	v6.16b,v6.16b,v6.16b,#8 | 
|  | ror	w12,w12,#25 | 
|  | ext	v18.16b,v18.16b,v18.16b,#8 | 
|  | ror	w9,w9,#25 | 
|  | ext	v3.16b,v3.16b,v3.16b,#4 | 
|  | ext	v7.16b,v7.16b,v7.16b,#4 | 
|  | ext	v19.16b,v19.16b,v19.16b,#4 | 
|  | ext	v1.16b,v1.16b,v1.16b,#12 | 
|  | ext	v5.16b,v5.16b,v5.16b,#12 | 
|  | ext	v17.16b,v17.16b,v17.16b,#12 | 
|  | cbnz	x4,Loop_neon | 
|  |  | 
|  | add	w5,w5,w22		// accumulate key block | 
|  | add	v0.4s,v0.4s,v24.4s | 
|  | add	x6,x6,x22,lsr#32 | 
|  | add	v4.4s,v4.4s,v24.4s | 
|  | add	w7,w7,w23 | 
|  | add	v16.4s,v16.4s,v24.4s | 
|  | add	x8,x8,x23,lsr#32 | 
|  | add	v2.4s,v2.4s,v26.4s | 
|  | add	w9,w9,w24 | 
|  | add	v6.4s,v6.4s,v26.4s | 
|  | add	x10,x10,x24,lsr#32 | 
|  | add	v18.4s,v18.4s,v26.4s | 
|  | add	w11,w11,w25 | 
|  | add	v3.4s,v3.4s,v27.4s | 
|  | add	x12,x12,x25,lsr#32 | 
|  | add	w13,w13,w26 | 
|  | add	v7.4s,v7.4s,v28.4s | 
|  | add	x14,x14,x26,lsr#32 | 
|  | add	w15,w15,w27 | 
|  | add	v19.4s,v19.4s,v29.4s | 
|  | add	x16,x16,x27,lsr#32 | 
|  | add	w17,w17,w28 | 
|  | add	v1.4s,v1.4s,v25.4s | 
|  | add	x19,x19,x28,lsr#32 | 
|  | add	w20,w20,w30 | 
|  | add	v5.4s,v5.4s,v25.4s | 
|  | add	x21,x21,x30,lsr#32 | 
|  | add	v17.4s,v17.4s,v25.4s | 
|  |  | 
|  | b.lo	Ltail_neon | 
|  |  | 
|  | add	x5,x5,x6,lsl#32	// pack | 
|  | add	x7,x7,x8,lsl#32 | 
|  | ldp	x6,x8,[x1,#0]		// load input | 
|  | add	x9,x9,x10,lsl#32 | 
|  | add	x11,x11,x12,lsl#32 | 
|  | ldp	x10,x12,[x1,#16] | 
|  | add	x13,x13,x14,lsl#32 | 
|  | add	x15,x15,x16,lsl#32 | 
|  | ldp	x14,x16,[x1,#32] | 
|  | add	x17,x17,x19,lsl#32 | 
|  | add	x20,x20,x21,lsl#32 | 
|  | ldp	x19,x21,[x1,#48] | 
|  | add	x1,x1,#64 | 
|  | #ifdef	__AARCH64EB__ | 
|  | rev	x5,x5 | 
|  | rev	x7,x7 | 
|  | rev	x9,x9 | 
|  | rev	x11,x11 | 
|  | rev	x13,x13 | 
|  | rev	x15,x15 | 
|  | rev	x17,x17 | 
|  | rev	x20,x20 | 
|  | #endif | 
|  | ld1	{v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 | 
|  | eor	x5,x5,x6 | 
|  | eor	x7,x7,x8 | 
|  | eor	x9,x9,x10 | 
|  | eor	x11,x11,x12 | 
|  | eor	x13,x13,x14 | 
|  | eor	v0.16b,v0.16b,v20.16b | 
|  | eor	x15,x15,x16 | 
|  | eor	v1.16b,v1.16b,v21.16b | 
|  | eor	x17,x17,x19 | 
|  | eor	v2.16b,v2.16b,v22.16b | 
|  | eor	x20,x20,x21 | 
|  | eor	v3.16b,v3.16b,v23.16b | 
|  | ld1	{v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 | 
|  |  | 
|  | stp	x5,x7,[x0,#0]		// store output | 
|  | add	x28,x28,#4			// increment counter | 
|  | stp	x9,x11,[x0,#16] | 
|  | add	v27.4s,v27.4s,v31.4s		// += 4 | 
|  | stp	x13,x15,[x0,#32] | 
|  | add	v28.4s,v28.4s,v31.4s | 
|  | stp	x17,x20,[x0,#48] | 
|  | add	v29.4s,v29.4s,v31.4s | 
|  | add	x0,x0,#64 | 
|  |  | 
|  | st1	{v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64 | 
|  | ld1	{v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64 | 
|  |  | 
|  | eor	v4.16b,v4.16b,v20.16b | 
|  | eor	v5.16b,v5.16b,v21.16b | 
|  | eor	v6.16b,v6.16b,v22.16b | 
|  | eor	v7.16b,v7.16b,v23.16b | 
|  | st1	{v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 | 
|  |  | 
|  | eor	v16.16b,v16.16b,v0.16b | 
|  | eor	v17.16b,v17.16b,v1.16b | 
|  | eor	v18.16b,v18.16b,v2.16b | 
|  | eor	v19.16b,v19.16b,v3.16b | 
|  | st1	{v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64 | 
|  |  | 
|  | b.hi	Loop_outer_neon | 
|  |  | 
|  | ldp	x19,x20,[x29,#16] | 
|  | add	sp,sp,#64 | 
|  | ldp	x21,x22,[x29,#32] | 
|  | ldp	x23,x24,[x29,#48] | 
|  | ldp	x25,x26,[x29,#64] | 
|  | ldp	x27,x28,[x29,#80] | 
|  | ldp	x29,x30,[sp],#96 | 
|  | AARCH64_VALIDATE_LINK_REGISTER | 
|  | ret | 
|  |  | 
|  | Ltail_neon: | 
|  | add	x2,x2,#256 | 
|  | cmp	x2,#64 | 
|  | b.lo	Less_than_64 | 
|  |  | 
|  | add	x5,x5,x6,lsl#32	// pack | 
|  | add	x7,x7,x8,lsl#32 | 
|  | ldp	x6,x8,[x1,#0]		// load input | 
|  | add	x9,x9,x10,lsl#32 | 
|  | add	x11,x11,x12,lsl#32 | 
|  | ldp	x10,x12,[x1,#16] | 
|  | add	x13,x13,x14,lsl#32 | 
|  | add	x15,x15,x16,lsl#32 | 
|  | ldp	x14,x16,[x1,#32] | 
|  | add	x17,x17,x19,lsl#32 | 
|  | add	x20,x20,x21,lsl#32 | 
|  | ldp	x19,x21,[x1,#48] | 
|  | add	x1,x1,#64 | 
|  | #ifdef	__AARCH64EB__ | 
|  | rev	x5,x5 | 
|  | rev	x7,x7 | 
|  | rev	x9,x9 | 
|  | rev	x11,x11 | 
|  | rev	x13,x13 | 
|  | rev	x15,x15 | 
|  | rev	x17,x17 | 
|  | rev	x20,x20 | 
|  | #endif | 
|  | eor	x5,x5,x6 | 
|  | eor	x7,x7,x8 | 
|  | eor	x9,x9,x10 | 
|  | eor	x11,x11,x12 | 
|  | eor	x13,x13,x14 | 
|  | eor	x15,x15,x16 | 
|  | eor	x17,x17,x19 | 
|  | eor	x20,x20,x21 | 
|  |  | 
|  | stp	x5,x7,[x0,#0]		// store output | 
|  | add	x28,x28,#4			// increment counter | 
|  | stp	x9,x11,[x0,#16] | 
|  | stp	x13,x15,[x0,#32] | 
|  | stp	x17,x20,[x0,#48] | 
|  | add	x0,x0,#64 | 
|  | b.eq	Ldone_neon | 
|  | sub	x2,x2,#64 | 
|  | cmp	x2,#64 | 
|  | b.lo	Less_than_128 | 
|  |  | 
|  | ld1	{v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 | 
|  | eor	v0.16b,v0.16b,v20.16b | 
|  | eor	v1.16b,v1.16b,v21.16b | 
|  | eor	v2.16b,v2.16b,v22.16b | 
|  | eor	v3.16b,v3.16b,v23.16b | 
|  | st1	{v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64 | 
|  | b.eq	Ldone_neon | 
|  | sub	x2,x2,#64 | 
|  | cmp	x2,#64 | 
|  | b.lo	Less_than_192 | 
|  |  | 
|  | ld1	{v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 | 
|  | eor	v4.16b,v4.16b,v20.16b | 
|  | eor	v5.16b,v5.16b,v21.16b | 
|  | eor	v6.16b,v6.16b,v22.16b | 
|  | eor	v7.16b,v7.16b,v23.16b | 
|  | st1	{v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 | 
|  | b.eq	Ldone_neon | 
|  | sub	x2,x2,#64 | 
|  |  | 
|  | st1	{v16.16b,v17.16b,v18.16b,v19.16b},[sp] | 
|  | b	Last_neon | 
|  |  | 
|  | Less_than_128: | 
|  | st1	{v0.16b,v1.16b,v2.16b,v3.16b},[sp] | 
|  | b	Last_neon | 
|  | Less_than_192: | 
|  | st1	{v4.16b,v5.16b,v6.16b,v7.16b},[sp] | 
|  | b	Last_neon | 
|  |  | 
|  | .align	4 | 
|  | Last_neon: | 
|  | sub	x0,x0,#1 | 
|  | add	x1,x1,x2 | 
|  | add	x0,x0,x2 | 
|  | add	x4,sp,x2 | 
|  | neg	x2,x2 | 
|  |  | 
|  | Loop_tail_neon: | 
|  | ldrb	w10,[x1,x2] | 
|  | ldrb	w11,[x4,x2] | 
|  | add	x2,x2,#1 | 
|  | eor	w10,w10,w11 | 
|  | strb	w10,[x0,x2] | 
|  | cbnz	x2,Loop_tail_neon | 
|  |  | 
|  | stp	xzr,xzr,[sp,#0] | 
|  | stp	xzr,xzr,[sp,#16] | 
|  | stp	xzr,xzr,[sp,#32] | 
|  | stp	xzr,xzr,[sp,#48] | 
|  |  | 
|  | Ldone_neon: | 
|  | ldp	x19,x20,[x29,#16] | 
|  | add	sp,sp,#64 | 
|  | ldp	x21,x22,[x29,#32] | 
|  | ldp	x23,x24,[x29,#48] | 
|  | ldp	x25,x26,[x29,#64] | 
|  | ldp	x27,x28,[x29,#80] | 
|  | ldp	x29,x30,[sp],#96 | 
|  | AARCH64_VALIDATE_LINK_REGISTER | 
|  | ret | 
|  |  | 
|  |  | 
|  | .align	5 | 
|  | ChaCha20_512_neon: | 
|  | AARCH64_SIGN_LINK_REGISTER | 
|  | stp	x29,x30,[sp,#-96]! | 
|  | add	x29,sp,#0 | 
|  |  | 
|  | adrp	x5,Lsigma@PAGE | 
|  | add	x5,x5,Lsigma@PAGEOFF | 
|  | stp	x19,x20,[sp,#16] | 
|  | stp	x21,x22,[sp,#32] | 
|  | stp	x23,x24,[sp,#48] | 
|  | stp	x25,x26,[sp,#64] | 
|  | stp	x27,x28,[sp,#80] | 
|  |  | 
|  | L512_or_more_neon: | 
|  | sub	sp,sp,#128+64 | 
|  |  | 
|  | ldp	x22,x23,[x5]		// load sigma | 
|  | ld1	{v24.4s},[x5],#16 | 
|  | ldp	x24,x25,[x3]		// load key | 
|  | ldp	x26,x27,[x3,#16] | 
|  | ld1	{v25.4s,v26.4s},[x3] | 
|  | ldp	x28,x30,[x4]		// load counter | 
|  | ld1	{v27.4s},[x4] | 
|  | ld1	{v31.4s},[x5] | 
|  | #ifdef	__AARCH64EB__ | 
|  | rev64	v24.4s,v24.4s | 
|  | ror	x24,x24,#32 | 
|  | ror	x25,x25,#32 | 
|  | ror	x26,x26,#32 | 
|  | ror	x27,x27,#32 | 
|  | ror	x28,x28,#32 | 
|  | ror	x30,x30,#32 | 
|  | #endif | 
|  | add	v27.4s,v27.4s,v31.4s		// += 1 | 
|  | stp	q24,q25,[sp,#0]		// off-load key block, invariant part | 
|  | add	v27.4s,v27.4s,v31.4s		// not typo | 
|  | str	q26,[sp,#32] | 
|  | add	v28.4s,v27.4s,v31.4s | 
|  | add	v29.4s,v28.4s,v31.4s | 
|  | add	v30.4s,v29.4s,v31.4s | 
|  | shl	v31.4s,v31.4s,#2			// 1 -> 4 | 
|  |  | 
|  | stp	d8,d9,[sp,#128+0]		// meet ABI requirements | 
|  | stp	d10,d11,[sp,#128+16] | 
|  | stp	d12,d13,[sp,#128+32] | 
|  | stp	d14,d15,[sp,#128+48] | 
|  |  | 
|  | sub	x2,x2,#512			// not typo | 
|  |  | 
|  | Loop_outer_512_neon: | 
|  | mov	v0.16b,v24.16b | 
|  | mov	v4.16b,v24.16b | 
|  | mov	v8.16b,v24.16b | 
|  | mov	v12.16b,v24.16b | 
|  | mov	v16.16b,v24.16b | 
|  | mov	v20.16b,v24.16b | 
|  | mov	v1.16b,v25.16b | 
|  | mov	w5,w22			// unpack key block | 
|  | mov	v5.16b,v25.16b | 
|  | lsr	x6,x22,#32 | 
|  | mov	v9.16b,v25.16b | 
|  | mov	w7,w23 | 
|  | mov	v13.16b,v25.16b | 
|  | lsr	x8,x23,#32 | 
|  | mov	v17.16b,v25.16b | 
|  | mov	w9,w24 | 
|  | mov	v21.16b,v25.16b | 
|  | lsr	x10,x24,#32 | 
|  | mov	v3.16b,v27.16b | 
|  | mov	w11,w25 | 
|  | mov	v7.16b,v28.16b | 
|  | lsr	x12,x25,#32 | 
|  | mov	v11.16b,v29.16b | 
|  | mov	w13,w26 | 
|  | mov	v15.16b,v30.16b | 
|  | lsr	x14,x26,#32 | 
|  | mov	v2.16b,v26.16b | 
|  | mov	w15,w27 | 
|  | mov	v6.16b,v26.16b | 
|  | lsr	x16,x27,#32 | 
|  | add	v19.4s,v3.4s,v31.4s			// +4 | 
|  | mov	w17,w28 | 
|  | add	v23.4s,v7.4s,v31.4s			// +4 | 
|  | lsr	x19,x28,#32 | 
|  | mov	v10.16b,v26.16b | 
|  | mov	w20,w30 | 
|  | mov	v14.16b,v26.16b | 
|  | lsr	x21,x30,#32 | 
|  | mov	v18.16b,v26.16b | 
|  | stp	q27,q28,[sp,#48]		// off-load key block, variable part | 
|  | mov	v22.16b,v26.16b | 
|  | str	q29,[sp,#80] | 
|  |  | 
|  | mov	x4,#5 | 
|  | subs	x2,x2,#512 | 
|  | Loop_upper_neon: | 
|  | sub	x4,x4,#1 | 
|  | add	v0.4s,v0.4s,v1.4s | 
|  | add	w5,w5,w9 | 
|  | add	v4.4s,v4.4s,v5.4s | 
|  | add	w6,w6,w10 | 
|  | add	v8.4s,v8.4s,v9.4s | 
|  | add	w7,w7,w11 | 
|  | add	v12.4s,v12.4s,v13.4s | 
|  | add	w8,w8,w12 | 
|  | add	v16.4s,v16.4s,v17.4s | 
|  | eor	w17,w17,w5 | 
|  | add	v20.4s,v20.4s,v21.4s | 
|  | eor	w19,w19,w6 | 
|  | eor	v3.16b,v3.16b,v0.16b | 
|  | eor	w20,w20,w7 | 
|  | eor	v7.16b,v7.16b,v4.16b | 
|  | eor	w21,w21,w8 | 
|  | eor	v11.16b,v11.16b,v8.16b | 
|  | ror	w17,w17,#16 | 
|  | eor	v15.16b,v15.16b,v12.16b | 
|  | ror	w19,w19,#16 | 
|  | eor	v19.16b,v19.16b,v16.16b | 
|  | ror	w20,w20,#16 | 
|  | eor	v23.16b,v23.16b,v20.16b | 
|  | ror	w21,w21,#16 | 
|  | rev32	v3.8h,v3.8h | 
|  | add	w13,w13,w17 | 
|  | rev32	v7.8h,v7.8h | 
|  | add	w14,w14,w19 | 
|  | rev32	v11.8h,v11.8h | 
|  | add	w15,w15,w20 | 
|  | rev32	v15.8h,v15.8h | 
|  | add	w16,w16,w21 | 
|  | rev32	v19.8h,v19.8h | 
|  | eor	w9,w9,w13 | 
|  | rev32	v23.8h,v23.8h | 
|  | eor	w10,w10,w14 | 
|  | add	v2.4s,v2.4s,v3.4s | 
|  | eor	w11,w11,w15 | 
|  | add	v6.4s,v6.4s,v7.4s | 
|  | eor	w12,w12,w16 | 
|  | add	v10.4s,v10.4s,v11.4s | 
|  | ror	w9,w9,#20 | 
|  | add	v14.4s,v14.4s,v15.4s | 
|  | ror	w10,w10,#20 | 
|  | add	v18.4s,v18.4s,v19.4s | 
|  | ror	w11,w11,#20 | 
|  | add	v22.4s,v22.4s,v23.4s | 
|  | ror	w12,w12,#20 | 
|  | eor	v24.16b,v1.16b,v2.16b | 
|  | add	w5,w5,w9 | 
|  | eor	v25.16b,v5.16b,v6.16b | 
|  | add	w6,w6,w10 | 
|  | eor	v26.16b,v9.16b,v10.16b | 
|  | add	w7,w7,w11 | 
|  | eor	v27.16b,v13.16b,v14.16b | 
|  | add	w8,w8,w12 | 
|  | eor	v28.16b,v17.16b,v18.16b | 
|  | eor	w17,w17,w5 | 
|  | eor	v29.16b,v21.16b,v22.16b | 
|  | eor	w19,w19,w6 | 
|  | ushr	v1.4s,v24.4s,#20 | 
|  | eor	w20,w20,w7 | 
|  | ushr	v5.4s,v25.4s,#20 | 
|  | eor	w21,w21,w8 | 
|  | ushr	v9.4s,v26.4s,#20 | 
|  | ror	w17,w17,#24 | 
|  | ushr	v13.4s,v27.4s,#20 | 
|  | ror	w19,w19,#24 | 
|  | ushr	v17.4s,v28.4s,#20 | 
|  | ror	w20,w20,#24 | 
|  | ushr	v21.4s,v29.4s,#20 | 
|  | ror	w21,w21,#24 | 
|  | sli	v1.4s,v24.4s,#12 | 
|  | add	w13,w13,w17 | 
|  | sli	v5.4s,v25.4s,#12 | 
|  | add	w14,w14,w19 | 
|  | sli	v9.4s,v26.4s,#12 | 
|  | add	w15,w15,w20 | 
|  | sli	v13.4s,v27.4s,#12 | 
|  | add	w16,w16,w21 | 
|  | sli	v17.4s,v28.4s,#12 | 
|  | eor	w9,w9,w13 | 
|  | sli	v21.4s,v29.4s,#12 | 
|  | eor	w10,w10,w14 | 
|  | add	v0.4s,v0.4s,v1.4s | 
|  | eor	w11,w11,w15 | 
|  | add	v4.4s,v4.4s,v5.4s | 
|  | eor	w12,w12,w16 | 
|  | add	v8.4s,v8.4s,v9.4s | 
|  | ror	w9,w9,#25 | 
|  | add	v12.4s,v12.4s,v13.4s | 
|  | ror	w10,w10,#25 | 
|  | add	v16.4s,v16.4s,v17.4s | 
|  | ror	w11,w11,#25 | 
|  | add	v20.4s,v20.4s,v21.4s | 
|  | ror	w12,w12,#25 | 
|  | eor	v24.16b,v3.16b,v0.16b | 
|  | add	w5,w5,w10 | 
|  | eor	v25.16b,v7.16b,v4.16b | 
|  | add	w6,w6,w11 | 
|  | eor	v26.16b,v11.16b,v8.16b | 
|  | add	w7,w7,w12 | 
|  | eor	v27.16b,v15.16b,v12.16b | 
|  | add	w8,w8,w9 | 
|  | eor	v28.16b,v19.16b,v16.16b | 
|  | eor	w21,w21,w5 | 
|  | eor	v29.16b,v23.16b,v20.16b | 
|  | eor	w17,w17,w6 | 
|  | ushr	v3.4s,v24.4s,#24 | 
|  | eor	w19,w19,w7 | 
|  | ushr	v7.4s,v25.4s,#24 | 
|  | eor	w20,w20,w8 | 
|  | ushr	v11.4s,v26.4s,#24 | 
|  | ror	w21,w21,#16 | 
|  | ushr	v15.4s,v27.4s,#24 | 
|  | ror	w17,w17,#16 | 
|  | ushr	v19.4s,v28.4s,#24 | 
|  | ror	w19,w19,#16 | 
|  | ushr	v23.4s,v29.4s,#24 | 
|  | ror	w20,w20,#16 | 
|  | sli	v3.4s,v24.4s,#8 | 
|  | add	w15,w15,w21 | 
|  | sli	v7.4s,v25.4s,#8 | 
|  | add	w16,w16,w17 | 
|  | sli	v11.4s,v26.4s,#8 | 
|  | add	w13,w13,w19 | 
|  | sli	v15.4s,v27.4s,#8 | 
|  | add	w14,w14,w20 | 
|  | sli	v19.4s,v28.4s,#8 | 
|  | eor	w10,w10,w15 | 
|  | sli	v23.4s,v29.4s,#8 | 
|  | eor	w11,w11,w16 | 
|  | add	v2.4s,v2.4s,v3.4s | 
|  | eor	w12,w12,w13 | 
|  | add	v6.4s,v6.4s,v7.4s | 
|  | eor	w9,w9,w14 | 
|  | add	v10.4s,v10.4s,v11.4s | 
|  | ror	w10,w10,#20 | 
|  | add	v14.4s,v14.4s,v15.4s | 
|  | ror	w11,w11,#20 | 
|  | add	v18.4s,v18.4s,v19.4s | 
|  | ror	w12,w12,#20 | 
|  | add	v22.4s,v22.4s,v23.4s | 
|  | ror	w9,w9,#20 | 
|  | eor	v24.16b,v1.16b,v2.16b | 
|  | add	w5,w5,w10 | 
|  | eor	v25.16b,v5.16b,v6.16b | 
|  | add	w6,w6,w11 | 
|  | eor	v26.16b,v9.16b,v10.16b | 
|  | add	w7,w7,w12 | 
|  | eor	v27.16b,v13.16b,v14.16b | 
|  | add	w8,w8,w9 | 
|  | eor	v28.16b,v17.16b,v18.16b | 
|  | eor	w21,w21,w5 | 
|  | eor	v29.16b,v21.16b,v22.16b | 
|  | eor	w17,w17,w6 | 
|  | ushr	v1.4s,v24.4s,#25 | 
|  | eor	w19,w19,w7 | 
|  | ushr	v5.4s,v25.4s,#25 | 
|  | eor	w20,w20,w8 | 
|  | ushr	v9.4s,v26.4s,#25 | 
|  | ror	w21,w21,#24 | 
|  | ushr	v13.4s,v27.4s,#25 | 
|  | ror	w17,w17,#24 | 
|  | ushr	v17.4s,v28.4s,#25 | 
|  | ror	w19,w19,#24 | 
|  | ushr	v21.4s,v29.4s,#25 | 
|  | ror	w20,w20,#24 | 
|  | sli	v1.4s,v24.4s,#7 | 
|  | add	w15,w15,w21 | 
|  | sli	v5.4s,v25.4s,#7 | 
|  | add	w16,w16,w17 | 
|  | sli	v9.4s,v26.4s,#7 | 
|  | add	w13,w13,w19 | 
|  | sli	v13.4s,v27.4s,#7 | 
|  | add	w14,w14,w20 | 
|  | sli	v17.4s,v28.4s,#7 | 
|  | eor	w10,w10,w15 | 
|  | sli	v21.4s,v29.4s,#7 | 
|  | eor	w11,w11,w16 | 
|  | ext	v2.16b,v2.16b,v2.16b,#8 | 
|  | eor	w12,w12,w13 | 
|  | ext	v6.16b,v6.16b,v6.16b,#8 | 
|  | eor	w9,w9,w14 | 
|  | ext	v10.16b,v10.16b,v10.16b,#8 | 
|  | ror	w10,w10,#25 | 
|  | ext	v14.16b,v14.16b,v14.16b,#8 | 
|  | ror	w11,w11,#25 | 
|  | ext	v18.16b,v18.16b,v18.16b,#8 | 
|  | ror	w12,w12,#25 | 
|  | ext	v22.16b,v22.16b,v22.16b,#8 | 
|  | ror	w9,w9,#25 | 
|  | ext	v3.16b,v3.16b,v3.16b,#12 | 
|  | ext	v7.16b,v7.16b,v7.16b,#12 | 
|  | ext	v11.16b,v11.16b,v11.16b,#12 | 
|  | ext	v15.16b,v15.16b,v15.16b,#12 | 
|  | ext	v19.16b,v19.16b,v19.16b,#12 | 
|  | ext	v23.16b,v23.16b,v23.16b,#12 | 
|  | ext	v1.16b,v1.16b,v1.16b,#4 | 
|  | ext	v5.16b,v5.16b,v5.16b,#4 | 
|  | ext	v9.16b,v9.16b,v9.16b,#4 | 
|  | ext	v13.16b,v13.16b,v13.16b,#4 | 
|  | ext	v17.16b,v17.16b,v17.16b,#4 | 
|  | ext	v21.16b,v21.16b,v21.16b,#4 | 
|  | add	v0.4s,v0.4s,v1.4s | 
|  | add	w5,w5,w9 | 
|  | add	v4.4s,v4.4s,v5.4s | 
|  | add	w6,w6,w10 | 
|  | add	v8.4s,v8.4s,v9.4s | 
|  | add	w7,w7,w11 | 
|  | add	v12.4s,v12.4s,v13.4s | 
|  | add	w8,w8,w12 | 
|  | add	v16.4s,v16.4s,v17.4s | 
|  | eor	w17,w17,w5 | 
|  | add	v20.4s,v20.4s,v21.4s | 
|  | eor	w19,w19,w6 | 
|  | eor	v3.16b,v3.16b,v0.16b | 
|  | eor	w20,w20,w7 | 
|  | eor	v7.16b,v7.16b,v4.16b | 
|  | eor	w21,w21,w8 | 
|  | eor	v11.16b,v11.16b,v8.16b | 
|  | ror	w17,w17,#16 | 
|  | eor	v15.16b,v15.16b,v12.16b | 
|  | ror	w19,w19,#16 | 
|  | eor	v19.16b,v19.16b,v16.16b | 
|  | ror	w20,w20,#16 | 
|  | eor	v23.16b,v23.16b,v20.16b | 
|  | ror	w21,w21,#16 | 
|  | rev32	v3.8h,v3.8h | 
|  | add	w13,w13,w17 | 
|  | rev32	v7.8h,v7.8h | 
|  | add	w14,w14,w19 | 
|  | rev32	v11.8h,v11.8h | 
|  | add	w15,w15,w20 | 
|  | rev32	v15.8h,v15.8h | 
|  | add	w16,w16,w21 | 
|  | rev32	v19.8h,v19.8h | 
|  | eor	w9,w9,w13 | 
|  | rev32	v23.8h,v23.8h | 
|  | eor	w10,w10,w14 | 
|  | add	v2.4s,v2.4s,v3.4s | 
|  | eor	w11,w11,w15 | 
|  | add	v6.4s,v6.4s,v7.4s | 
|  | eor	w12,w12,w16 | 
|  | add	v10.4s,v10.4s,v11.4s | 
|  | ror	w9,w9,#20 | 
|  | add	v14.4s,v14.4s,v15.4s | 
|  | ror	w10,w10,#20 | 
|  | add	v18.4s,v18.4s,v19.4s | 
|  | ror	w11,w11,#20 | 
|  | add	v22.4s,v22.4s,v23.4s | 
|  | ror	w12,w12,#20 | 
|  | eor	v24.16b,v1.16b,v2.16b | 
|  | add	w5,w5,w9 | 
|  | eor	v25.16b,v5.16b,v6.16b | 
|  | add	w6,w6,w10 | 
|  | eor	v26.16b,v9.16b,v10.16b | 
|  | add	w7,w7,w11 | 
|  | eor	v27.16b,v13.16b,v14.16b | 
|  | add	w8,w8,w12 | 
|  | eor	v28.16b,v17.16b,v18.16b | 
|  | eor	w17,w17,w5 | 
|  | eor	v29.16b,v21.16b,v22.16b | 
|  | eor	w19,w19,w6 | 
|  | ushr	v1.4s,v24.4s,#20 | 
|  | eor	w20,w20,w7 | 
|  | ushr	v5.4s,v25.4s,#20 | 
|  | eor	w21,w21,w8 | 
|  | ushr	v9.4s,v26.4s,#20 | 
|  | ror	w17,w17,#24 | 
|  | ushr	v13.4s,v27.4s,#20 | 
|  | ror	w19,w19,#24 | 
|  | ushr	v17.4s,v28.4s,#20 | 
|  | ror	w20,w20,#24 | 
|  | ushr	v21.4s,v29.4s,#20 | 
|  | ror	w21,w21,#24 | 
|  | sli	v1.4s,v24.4s,#12 | 
|  | add	w13,w13,w17 | 
|  | sli	v5.4s,v25.4s,#12 | 
|  | add	w14,w14,w19 | 
|  | sli	v9.4s,v26.4s,#12 | 
|  | add	w15,w15,w20 | 
|  | sli	v13.4s,v27.4s,#12 | 
|  | add	w16,w16,w21 | 
|  | sli	v17.4s,v28.4s,#12 | 
|  | eor	w9,w9,w13 | 
|  | sli	v21.4s,v29.4s,#12 | 
|  | eor	w10,w10,w14 | 
|  | add	v0.4s,v0.4s,v1.4s | 
|  | eor	w11,w11,w15 | 
|  | add	v4.4s,v4.4s,v5.4s | 
|  | eor	w12,w12,w16 | 
|  | add	v8.4s,v8.4s,v9.4s | 
|  | ror	w9,w9,#25 | 
|  | add	v12.4s,v12.4s,v13.4s | 
|  | ror	w10,w10,#25 | 
|  | add	v16.4s,v16.4s,v17.4s | 
|  | ror	w11,w11,#25 | 
|  | add	v20.4s,v20.4s,v21.4s | 
|  | ror	w12,w12,#25 | 
|  | eor	v24.16b,v3.16b,v0.16b | 
|  | add	w5,w5,w10 | 
|  | eor	v25.16b,v7.16b,v4.16b | 
|  | add	w6,w6,w11 | 
|  | eor	v26.16b,v11.16b,v8.16b | 
|  | add	w7,w7,w12 | 
|  | eor	v27.16b,v15.16b,v12.16b | 
|  | add	w8,w8,w9 | 
|  | eor	v28.16b,v19.16b,v16.16b | 
|  | eor	w21,w21,w5 | 
|  | eor	v29.16b,v23.16b,v20.16b | 
|  | eor	w17,w17,w6 | 
|  | ushr	v3.4s,v24.4s,#24 | 
|  | eor	w19,w19,w7 | 
|  | ushr	v7.4s,v25.4s,#24 | 
|  | eor	w20,w20,w8 | 
|  | ushr	v11.4s,v26.4s,#24 | 
|  | ror	w21,w21,#16 | 
|  | ushr	v15.4s,v27.4s,#24 | 
|  | ror	w17,w17,#16 | 
|  | ushr	v19.4s,v28.4s,#24 | 
|  | ror	w19,w19,#16 | 
|  | ushr	v23.4s,v29.4s,#24 | 
|  | ror	w20,w20,#16 | 
|  | sli	v3.4s,v24.4s,#8 | 
|  | add	w15,w15,w21 | 
|  | sli	v7.4s,v25.4s,#8 | 
|  | add	w16,w16,w17 | 
|  | sli	v11.4s,v26.4s,#8 | 
|  | add	w13,w13,w19 | 
|  | sli	v15.4s,v27.4s,#8 | 
|  | add	w14,w14,w20 | 
|  | sli	v19.4s,v28.4s,#8 | 
|  | eor	w10,w10,w15 | 
|  | sli	v23.4s,v29.4s,#8 | 
|  | eor	w11,w11,w16 | 
|  | add	v2.4s,v2.4s,v3.4s | 
|  | eor	w12,w12,w13 | 
|  | add	v6.4s,v6.4s,v7.4s | 
|  | eor	w9,w9,w14 | 
|  | add	v10.4s,v10.4s,v11.4s | 
|  | ror	w10,w10,#20 | 
|  | add	v14.4s,v14.4s,v15.4s | 
|  | ror	w11,w11,#20 | 
|  | add	v18.4s,v18.4s,v19.4s | 
|  | ror	w12,w12,#20 | 
|  | add	v22.4s,v22.4s,v23.4s | 
|  | ror	w9,w9,#20 | 
|  | eor	v24.16b,v1.16b,v2.16b | 
|  | add	w5,w5,w10 | 
|  | eor	v25.16b,v5.16b,v6.16b | 
|  | add	w6,w6,w11 | 
|  | eor	v26.16b,v9.16b,v10.16b | 
|  | add	w7,w7,w12 | 
|  | eor	v27.16b,v13.16b,v14.16b | 
|  | add	w8,w8,w9 | 
|  | eor	v28.16b,v17.16b,v18.16b | 
|  | eor	w21,w21,w5 | 
|  | eor	v29.16b,v21.16b,v22.16b | 
|  | eor	w17,w17,w6 | 
|  | ushr	v1.4s,v24.4s,#25 | 
|  | eor	w19,w19,w7 | 
|  | ushr	v5.4s,v25.4s,#25 | 
|  | eor	w20,w20,w8 | 
|  | ushr	v9.4s,v26.4s,#25 | 
|  | ror	w21,w21,#24 | 
|  | ushr	v13.4s,v27.4s,#25 | 
|  | ror	w17,w17,#24 | 
|  | ushr	v17.4s,v28.4s,#25 | 
|  | ror	w19,w19,#24 | 
|  | ushr	v21.4s,v29.4s,#25 | 
|  | ror	w20,w20,#24 | 
|  | sli	v1.4s,v24.4s,#7 | 
|  | add	w15,w15,w21 | 
|  | sli	v5.4s,v25.4s,#7 | 
|  | add	w16,w16,w17 | 
|  | sli	v9.4s,v26.4s,#7 | 
|  | add	w13,w13,w19 | 
|  | sli	v13.4s,v27.4s,#7 | 
|  | add	w14,w14,w20 | 
|  | sli	v17.4s,v28.4s,#7 | 
|  | eor	w10,w10,w15 | 
|  | sli	v21.4s,v29.4s,#7 | 
|  | eor	w11,w11,w16 | 
|  | ext	v2.16b,v2.16b,v2.16b,#8 | 
|  | eor	w12,w12,w13 | 
|  | ext	v6.16b,v6.16b,v6.16b,#8 | 
|  | eor	w9,w9,w14 | 
|  | ext	v10.16b,v10.16b,v10.16b,#8 | 
|  | ror	w10,w10,#25 | 
|  | ext	v14.16b,v14.16b,v14.16b,#8 | 
|  | ror	w11,w11,#25 | 
|  | ext	v18.16b,v18.16b,v18.16b,#8 | 
|  | ror	w12,w12,#25 | 
|  | ext	v22.16b,v22.16b,v22.16b,#8 | 
|  | ror	w9,w9,#25 | 
|  | ext	v3.16b,v3.16b,v3.16b,#4 | 
|  | ext	v7.16b,v7.16b,v7.16b,#4 | 
|  | ext	v11.16b,v11.16b,v11.16b,#4 | 
|  | ext	v15.16b,v15.16b,v15.16b,#4 | 
|  | ext	v19.16b,v19.16b,v19.16b,#4 | 
|  | ext	v23.16b,v23.16b,v23.16b,#4 | 
|  | ext	v1.16b,v1.16b,v1.16b,#12 | 
|  | ext	v5.16b,v5.16b,v5.16b,#12 | 
|  | ext	v9.16b,v9.16b,v9.16b,#12 | 
|  | ext	v13.16b,v13.16b,v13.16b,#12 | 
|  | ext	v17.16b,v17.16b,v17.16b,#12 | 
|  | ext	v21.16b,v21.16b,v21.16b,#12 | 
|  | cbnz	x4,Loop_upper_neon | 
|  |  | 
|  | add	w5,w5,w22		// accumulate key block | 
|  | add	x6,x6,x22,lsr#32 | 
|  | add	w7,w7,w23 | 
|  | add	x8,x8,x23,lsr#32 | 
|  | add	w9,w9,w24 | 
|  | add	x10,x10,x24,lsr#32 | 
|  | add	w11,w11,w25 | 
|  | add	x12,x12,x25,lsr#32 | 
|  | add	w13,w13,w26 | 
|  | add	x14,x14,x26,lsr#32 | 
|  | add	w15,w15,w27 | 
|  | add	x16,x16,x27,lsr#32 | 
|  | add	w17,w17,w28 | 
|  | add	x19,x19,x28,lsr#32 | 
|  | add	w20,w20,w30 | 
|  | add	x21,x21,x30,lsr#32 | 
|  |  | 
|  | add	x5,x5,x6,lsl#32	// pack | 
|  | add	x7,x7,x8,lsl#32 | 
|  | ldp	x6,x8,[x1,#0]		// load input | 
|  | add	x9,x9,x10,lsl#32 | 
|  | add	x11,x11,x12,lsl#32 | 
|  | ldp	x10,x12,[x1,#16] | 
|  | add	x13,x13,x14,lsl#32 | 
|  | add	x15,x15,x16,lsl#32 | 
|  | ldp	x14,x16,[x1,#32] | 
|  | add	x17,x17,x19,lsl#32 | 
|  | add	x20,x20,x21,lsl#32 | 
|  | ldp	x19,x21,[x1,#48] | 
|  | add	x1,x1,#64 | 
|  | #ifdef	__AARCH64EB__ | 
|  | rev	x5,x5 | 
|  | rev	x7,x7 | 
|  | rev	x9,x9 | 
|  | rev	x11,x11 | 
|  | rev	x13,x13 | 
|  | rev	x15,x15 | 
|  | rev	x17,x17 | 
|  | rev	x20,x20 | 
|  | #endif | 
|  | eor	x5,x5,x6 | 
|  | eor	x7,x7,x8 | 
|  | eor	x9,x9,x10 | 
|  | eor	x11,x11,x12 | 
|  | eor	x13,x13,x14 | 
|  | eor	x15,x15,x16 | 
|  | eor	x17,x17,x19 | 
|  | eor	x20,x20,x21 | 
|  |  | 
|  | stp	x5,x7,[x0,#0]		// store output | 
|  | add	x28,x28,#1			// increment counter | 
|  | mov	w5,w22			// unpack key block | 
|  | lsr	x6,x22,#32 | 
|  | stp	x9,x11,[x0,#16] | 
|  | mov	w7,w23 | 
|  | lsr	x8,x23,#32 | 
|  | stp	x13,x15,[x0,#32] | 
|  | mov	w9,w24 | 
|  | lsr	x10,x24,#32 | 
|  | stp	x17,x20,[x0,#48] | 
|  | add	x0,x0,#64 | 
|  | mov	w11,w25 | 
|  | lsr	x12,x25,#32 | 
|  | mov	w13,w26 | 
|  | lsr	x14,x26,#32 | 
|  | mov	w15,w27 | 
|  | lsr	x16,x27,#32 | 
|  | mov	w17,w28 | 
|  | lsr	x19,x28,#32 | 
|  | mov	w20,w30 | 
|  | lsr	x21,x30,#32 | 
|  |  | 
|  | mov	x4,#5 | 
|  | Loop_lower_neon: | 
|  | sub	x4,x4,#1 | 
|  | add	v0.4s,v0.4s,v1.4s | 
|  | add	w5,w5,w9 | 
|  | add	v4.4s,v4.4s,v5.4s | 
|  | add	w6,w6,w10 | 
|  | add	v8.4s,v8.4s,v9.4s | 
|  | add	w7,w7,w11 | 
|  | add	v12.4s,v12.4s,v13.4s | 
|  | add	w8,w8,w12 | 
|  | add	v16.4s,v16.4s,v17.4s | 
|  | eor	w17,w17,w5 | 
|  | add	v20.4s,v20.4s,v21.4s | 
|  | eor	w19,w19,w6 | 
|  | eor	v3.16b,v3.16b,v0.16b | 
|  | eor	w20,w20,w7 | 
|  | eor	v7.16b,v7.16b,v4.16b | 
|  | eor	w21,w21,w8 | 
|  | eor	v11.16b,v11.16b,v8.16b | 
|  | ror	w17,w17,#16 | 
|  | eor	v15.16b,v15.16b,v12.16b | 
|  | ror	w19,w19,#16 | 
|  | eor	v19.16b,v19.16b,v16.16b | 
|  | ror	w20,w20,#16 | 
|  | eor	v23.16b,v23.16b,v20.16b | 
|  | ror	w21,w21,#16 | 
|  | rev32	v3.8h,v3.8h | 
|  | add	w13,w13,w17 | 
|  | rev32	v7.8h,v7.8h | 
|  | add	w14,w14,w19 | 
|  | rev32	v11.8h,v11.8h | 
|  | add	w15,w15,w20 | 
|  | rev32	v15.8h,v15.8h | 
|  | add	w16,w16,w21 | 
|  | rev32	v19.8h,v19.8h | 
|  | eor	w9,w9,w13 | 
|  | rev32	v23.8h,v23.8h | 
|  | eor	w10,w10,w14 | 
|  | add	v2.4s,v2.4s,v3.4s | 
|  | eor	w11,w11,w15 | 
|  | add	v6.4s,v6.4s,v7.4s | 
|  | eor	w12,w12,w16 | 
|  | add	v10.4s,v10.4s,v11.4s | 
|  | ror	w9,w9,#20 | 
|  | add	v14.4s,v14.4s,v15.4s | 
|  | ror	w10,w10,#20 | 
|  | add	v18.4s,v18.4s,v19.4s | 
|  | ror	w11,w11,#20 | 
|  | add	v22.4s,v22.4s,v23.4s | 
|  | ror	w12,w12,#20 | 
|  | eor	v24.16b,v1.16b,v2.16b | 
|  | add	w5,w5,w9 | 
|  | eor	v25.16b,v5.16b,v6.16b | 
|  | add	w6,w6,w10 | 
|  | eor	v26.16b,v9.16b,v10.16b | 
|  | add	w7,w7,w11 | 
|  | eor	v27.16b,v13.16b,v14.16b | 
|  | add	w8,w8,w12 | 
|  | eor	v28.16b,v17.16b,v18.16b | 
|  | eor	w17,w17,w5 | 
|  | eor	v29.16b,v21.16b,v22.16b | 
|  | eor	w19,w19,w6 | 
|  | ushr	v1.4s,v24.4s,#20 | 
|  | eor	w20,w20,w7 | 
|  | ushr	v5.4s,v25.4s,#20 | 
|  | eor	w21,w21,w8 | 
|  | ushr	v9.4s,v26.4s,#20 | 
|  | ror	w17,w17,#24 | 
|  | ushr	v13.4s,v27.4s,#20 | 
|  | ror	w19,w19,#24 | 
|  | ushr	v17.4s,v28.4s,#20 | 
|  | ror	w20,w20,#24 | 
|  | ushr	v21.4s,v29.4s,#20 | 
|  | ror	w21,w21,#24 | 
|  | sli	v1.4s,v24.4s,#12 | 
|  | add	w13,w13,w17 | 
|  | sli	v5.4s,v25.4s,#12 | 
|  | add	w14,w14,w19 | 
|  | sli	v9.4s,v26.4s,#12 | 
|  | add	w15,w15,w20 | 
|  | sli	v13.4s,v27.4s,#12 | 
|  | add	w16,w16,w21 | 
|  | sli	v17.4s,v28.4s,#12 | 
|  | eor	w9,w9,w13 | 
|  | sli	v21.4s,v29.4s,#12 | 
|  | eor	w10,w10,w14 | 
|  | add	v0.4s,v0.4s,v1.4s | 
|  | eor	w11,w11,w15 | 
|  | add	v4.4s,v4.4s,v5.4s | 
|  | eor	w12,w12,w16 | 
|  | add	v8.4s,v8.4s,v9.4s | 
|  | ror	w9,w9,#25 | 
|  | add	v12.4s,v12.4s,v13.4s | 
|  | ror	w10,w10,#25 | 
|  | add	v16.4s,v16.4s,v17.4s | 
|  | ror	w11,w11,#25 | 
|  | add	v20.4s,v20.4s,v21.4s | 
|  | ror	w12,w12,#25 | 
|  | eor	v24.16b,v3.16b,v0.16b | 
|  | add	w5,w5,w10 | 
|  | eor	v25.16b,v7.16b,v4.16b | 
|  | add	w6,w6,w11 | 
|  | eor	v26.16b,v11.16b,v8.16b | 
|  | add	w7,w7,w12 | 
|  | eor	v27.16b,v15.16b,v12.16b | 
|  | add	w8,w8,w9 | 
|  | eor	v28.16b,v19.16b,v16.16b | 
|  | eor	w21,w21,w5 | 
|  | eor	v29.16b,v23.16b,v20.16b | 
|  | eor	w17,w17,w6 | 
|  | ushr	v3.4s,v24.4s,#24 | 
|  | eor	w19,w19,w7 | 
|  | ushr	v7.4s,v25.4s,#24 | 
|  | eor	w20,w20,w8 | 
|  | ushr	v11.4s,v26.4s,#24 | 
|  | ror	w21,w21,#16 | 
|  | ushr	v15.4s,v27.4s,#24 | 
|  | ror	w17,w17,#16 | 
|  | ushr	v19.4s,v28.4s,#24 | 
|  | ror	w19,w19,#16 | 
|  | ushr	v23.4s,v29.4s,#24 | 
|  | ror	w20,w20,#16 | 
|  | sli	v3.4s,v24.4s,#8 | 
|  | add	w15,w15,w21 | 
|  | sli	v7.4s,v25.4s,#8 | 
|  | add	w16,w16,w17 | 
|  | sli	v11.4s,v26.4s,#8 | 
|  | add	w13,w13,w19 | 
|  | sli	v15.4s,v27.4s,#8 | 
|  | add	w14,w14,w20 | 
|  | sli	v19.4s,v28.4s,#8 | 
|  | eor	w10,w10,w15 | 
|  | sli	v23.4s,v29.4s,#8 | 
|  | eor	w11,w11,w16 | 
|  | add	v2.4s,v2.4s,v3.4s | 
|  | eor	w12,w12,w13 | 
|  | add	v6.4s,v6.4s,v7.4s | 
|  | eor	w9,w9,w14 | 
|  | add	v10.4s,v10.4s,v11.4s | 
|  | ror	w10,w10,#20 | 
|  | add	v14.4s,v14.4s,v15.4s | 
|  | ror	w11,w11,#20 | 
|  | add	v18.4s,v18.4s,v19.4s | 
|  | ror	w12,w12,#20 | 
|  | add	v22.4s,v22.4s,v23.4s | 
|  | ror	w9,w9,#20 | 
|  | eor	v24.16b,v1.16b,v2.16b | 
|  | add	w5,w5,w10 | 
|  | eor	v25.16b,v5.16b,v6.16b | 
|  | add	w6,w6,w11 | 
|  | eor	v26.16b,v9.16b,v10.16b | 
|  | add	w7,w7,w12 | 
|  | eor	v27.16b,v13.16b,v14.16b | 
|  | add	w8,w8,w9 | 
|  | eor	v28.16b,v17.16b,v18.16b | 
|  | eor	w21,w21,w5 | 
|  | eor	v29.16b,v21.16b,v22.16b | 
|  | eor	w17,w17,w6 | 
|  | ushr	v1.4s,v24.4s,#25 | 
|  | eor	w19,w19,w7 | 
|  | ushr	v5.4s,v25.4s,#25 | 
|  | eor	w20,w20,w8 | 
|  | ushr	v9.4s,v26.4s,#25 | 
|  | ror	w21,w21,#24 | 
|  | ushr	v13.4s,v27.4s,#25 | 
|  | ror	w17,w17,#24 | 
|  | ushr	v17.4s,v28.4s,#25 | 
|  | ror	w19,w19,#24 | 
|  | ushr	v21.4s,v29.4s,#25 | 
|  | ror	w20,w20,#24 | 
|  | sli	v1.4s,v24.4s,#7 | 
|  | add	w15,w15,w21 | 
|  | sli	v5.4s,v25.4s,#7 | 
|  | add	w16,w16,w17 | 
|  | sli	v9.4s,v26.4s,#7 | 
|  | add	w13,w13,w19 | 
|  | sli	v13.4s,v27.4s,#7 | 
|  | add	w14,w14,w20 | 
|  | sli	v17.4s,v28.4s,#7 | 
|  | eor	w10,w10,w15 | 
|  | sli	v21.4s,v29.4s,#7 | 
|  | eor	w11,w11,w16 | 
|  | ext	v2.16b,v2.16b,v2.16b,#8 | 
|  | eor	w12,w12,w13 | 
|  | ext	v6.16b,v6.16b,v6.16b,#8 | 
|  | eor	w9,w9,w14 | 
|  | ext	v10.16b,v10.16b,v10.16b,#8 | 
|  | ror	w10,w10,#25 | 
|  | ext	v14.16b,v14.16b,v14.16b,#8 | 
|  | ror	w11,w11,#25 | 
|  | ext	v18.16b,v18.16b,v18.16b,#8 | 
|  | ror	w12,w12,#25 | 
|  | ext	v22.16b,v22.16b,v22.16b,#8 | 
|  | ror	w9,w9,#25 | 
|  | ext	v3.16b,v3.16b,v3.16b,#12 | 
|  | ext	v7.16b,v7.16b,v7.16b,#12 | 
|  | ext	v11.16b,v11.16b,v11.16b,#12 | 
|  | ext	v15.16b,v15.16b,v15.16b,#12 | 
|  | ext	v19.16b,v19.16b,v19.16b,#12 | 
|  | ext	v23.16b,v23.16b,v23.16b,#12 | 
|  | ext	v1.16b,v1.16b,v1.16b,#4 | 
|  | ext	v5.16b,v5.16b,v5.16b,#4 | 
|  | ext	v9.16b,v9.16b,v9.16b,#4 | 
|  | ext	v13.16b,v13.16b,v13.16b,#4 | 
|  | ext	v17.16b,v17.16b,v17.16b,#4 | 
|  | ext	v21.16b,v21.16b,v21.16b,#4 | 
|  | add	v0.4s,v0.4s,v1.4s | 
|  | add	w5,w5,w9 | 
|  | add	v4.4s,v4.4s,v5.4s | 
|  | add	w6,w6,w10 | 
|  | add	v8.4s,v8.4s,v9.4s | 
|  | add	w7,w7,w11 | 
|  | add	v12.4s,v12.4s,v13.4s | 
|  | add	w8,w8,w12 | 
|  | add	v16.4s,v16.4s,v17.4s | 
|  | eor	w17,w17,w5 | 
|  | add	v20.4s,v20.4s,v21.4s | 
|  | eor	w19,w19,w6 | 
|  | eor	v3.16b,v3.16b,v0.16b | 
|  | eor	w20,w20,w7 | 
|  | eor	v7.16b,v7.16b,v4.16b | 
|  | eor	w21,w21,w8 | 
|  | eor	v11.16b,v11.16b,v8.16b | 
|  | ror	w17,w17,#16 | 
|  | eor	v15.16b,v15.16b,v12.16b | 
|  | ror	w19,w19,#16 | 
|  | eor	v19.16b,v19.16b,v16.16b | 
|  | ror	w20,w20,#16 | 
|  | eor	v23.16b,v23.16b,v20.16b | 
|  | ror	w21,w21,#16 | 
|  | rev32	v3.8h,v3.8h | 
|  | add	w13,w13,w17 | 
|  | rev32	v7.8h,v7.8h | 
|  | add	w14,w14,w19 | 
|  | rev32	v11.8h,v11.8h | 
|  | add	w15,w15,w20 | 
|  | rev32	v15.8h,v15.8h | 
|  | add	w16,w16,w21 | 
|  | rev32	v19.8h,v19.8h | 
|  | eor	w9,w9,w13 | 
|  | rev32	v23.8h,v23.8h | 
|  | eor	w10,w10,w14 | 
|  | add	v2.4s,v2.4s,v3.4s | 
|  | eor	w11,w11,w15 | 
|  | add	v6.4s,v6.4s,v7.4s | 
|  | eor	w12,w12,w16 | 
|  | add	v10.4s,v10.4s,v11.4s | 
|  | ror	w9,w9,#20 | 
|  | add	v14.4s,v14.4s,v15.4s | 
|  | ror	w10,w10,#20 | 
|  | add	v18.4s,v18.4s,v19.4s | 
|  | ror	w11,w11,#20 | 
|  | add	v22.4s,v22.4s,v23.4s | 
|  | ror	w12,w12,#20 | 
|  | eor	v24.16b,v1.16b,v2.16b | 
|  | add	w5,w5,w9 | 
|  | eor	v25.16b,v5.16b,v6.16b | 
|  | add	w6,w6,w10 | 
|  | eor	v26.16b,v9.16b,v10.16b | 
|  | add	w7,w7,w11 | 
|  | eor	v27.16b,v13.16b,v14.16b | 
|  | add	w8,w8,w12 | 
|  | eor	v28.16b,v17.16b,v18.16b | 
|  | eor	w17,w17,w5 | 
|  | eor	v29.16b,v21.16b,v22.16b | 
|  | eor	w19,w19,w6 | 
|  | ushr	v1.4s,v24.4s,#20 | 
|  | eor	w20,w20,w7 | 
|  | ushr	v5.4s,v25.4s,#20 | 
|  | eor	w21,w21,w8 | 
|  | ushr	v9.4s,v26.4s,#20 | 
|  | ror	w17,w17,#24 | 
|  | ushr	v13.4s,v27.4s,#20 | 
|  | ror	w19,w19,#24 | 
|  | ushr	v17.4s,v28.4s,#20 | 
|  | ror	w20,w20,#24 | 
|  | ushr	v21.4s,v29.4s,#20 | 
|  | ror	w21,w21,#24 | 
|  | sli	v1.4s,v24.4s,#12 | 
|  | add	w13,w13,w17 | 
|  | sli	v5.4s,v25.4s,#12 | 
|  | add	w14,w14,w19 | 
|  | sli	v9.4s,v26.4s,#12 | 
|  | add	w15,w15,w20 | 
|  | sli	v13.4s,v27.4s,#12 | 
|  | add	w16,w16,w21 | 
|  | sli	v17.4s,v28.4s,#12 | 
|  | eor	w9,w9,w13 | 
|  | sli	v21.4s,v29.4s,#12 | 
|  | eor	w10,w10,w14 | 
|  | add	v0.4s,v0.4s,v1.4s | 
|  | eor	w11,w11,w15 | 
|  | add	v4.4s,v4.4s,v5.4s | 
|  | eor	w12,w12,w16 | 
|  | add	v8.4s,v8.4s,v9.4s | 
|  | ror	w9,w9,#25 | 
|  | add	v12.4s,v12.4s,v13.4s | 
|  | ror	w10,w10,#25 | 
|  | add	v16.4s,v16.4s,v17.4s | 
|  | ror	w11,w11,#25 | 
|  | add	v20.4s,v20.4s,v21.4s | 
|  | ror	w12,w12,#25 | 
|  | eor	v24.16b,v3.16b,v0.16b | 
|  | add	w5,w5,w10 | 
|  | eor	v25.16b,v7.16b,v4.16b | 
|  | add	w6,w6,w11 | 
|  | eor	v26.16b,v11.16b,v8.16b | 
|  | add	w7,w7,w12 | 
|  | eor	v27.16b,v15.16b,v12.16b | 
|  | add	w8,w8,w9 | 
|  | eor	v28.16b,v19.16b,v16.16b | 
|  | eor	w21,w21,w5 | 
|  | eor	v29.16b,v23.16b,v20.16b | 
|  | eor	w17,w17,w6 | 
|  | ushr	v3.4s,v24.4s,#24 | 
|  | eor	w19,w19,w7 | 
|  | ushr	v7.4s,v25.4s,#24 | 
|  | eor	w20,w20,w8 | 
|  | ushr	v11.4s,v26.4s,#24 | 
|  | ror	w21,w21,#16 | 
|  | ushr	v15.4s,v27.4s,#24 | 
|  | ror	w17,w17,#16 | 
|  | ushr	v19.4s,v28.4s,#24 | 
|  | ror	w19,w19,#16 | 
|  | ushr	v23.4s,v29.4s,#24 | 
|  | ror	w20,w20,#16 | 
|  | sli	v3.4s,v24.4s,#8 | 
|  | add	w15,w15,w21 | 
|  | sli	v7.4s,v25.4s,#8 | 
|  | add	w16,w16,w17 | 
|  | sli	v11.4s,v26.4s,#8 | 
|  | add	w13,w13,w19 | 
|  | sli	v15.4s,v27.4s,#8 | 
|  | add	w14,w14,w20 | 
|  | sli	v19.4s,v28.4s,#8 | 
|  | eor	w10,w10,w15 | 
|  | sli	v23.4s,v29.4s,#8 | 
|  | eor	w11,w11,w16 | 
|  | add	v2.4s,v2.4s,v3.4s | 
|  | eor	w12,w12,w13 | 
|  | add	v6.4s,v6.4s,v7.4s | 
|  | eor	w9,w9,w14 | 
|  | add	v10.4s,v10.4s,v11.4s | 
|  | ror	w10,w10,#20 | 
|  | add	v14.4s,v14.4s,v15.4s | 
|  | ror	w11,w11,#20 | 
|  | add	v18.4s,v18.4s,v19.4s | 
|  | ror	w12,w12,#20 | 
|  | add	v22.4s,v22.4s,v23.4s | 
|  | ror	w9,w9,#20 | 
|  | eor	v24.16b,v1.16b,v2.16b | 
|  | add	w5,w5,w10 | 
|  | eor	v25.16b,v5.16b,v6.16b | 
|  | add	w6,w6,w11 | 
|  | eor	v26.16b,v9.16b,v10.16b | 
|  | add	w7,w7,w12 | 
|  | eor	v27.16b,v13.16b,v14.16b | 
|  | add	w8,w8,w9 | 
|  | eor	v28.16b,v17.16b,v18.16b | 
|  | eor	w21,w21,w5 | 
|  | eor	v29.16b,v21.16b,v22.16b | 
|  | eor	w17,w17,w6 | 
|  | ushr	v1.4s,v24.4s,#25 | 
|  | eor	w19,w19,w7 | 
|  | ushr	v5.4s,v25.4s,#25 | 
|  | eor	w20,w20,w8 | 
|  | ushr	v9.4s,v26.4s,#25 | 
|  | ror	w21,w21,#24 | 
|  | ushr	v13.4s,v27.4s,#25 | 
|  | ror	w17,w17,#24 | 
|  | ushr	v17.4s,v28.4s,#25 | 
|  | ror	w19,w19,#24 | 
|  | ushr	v21.4s,v29.4s,#25 | 
|  | ror	w20,w20,#24 | 
|  | sli	v1.4s,v24.4s,#7 | 
|  | add	w15,w15,w21 | 
|  | sli	v5.4s,v25.4s,#7 | 
|  | add	w16,w16,w17 | 
|  | sli	v9.4s,v26.4s,#7 | 
|  | add	w13,w13,w19 | 
|  | sli	v13.4s,v27.4s,#7 | 
|  | add	w14,w14,w20 | 
|  | sli	v17.4s,v28.4s,#7 | 
|  | eor	w10,w10,w15 | 
|  | sli	v21.4s,v29.4s,#7 | 
|  | eor	w11,w11,w16 | 
|  | ext	v2.16b,v2.16b,v2.16b,#8 | 
|  | eor	w12,w12,w13 | 
|  | ext	v6.16b,v6.16b,v6.16b,#8 | 
|  | eor	w9,w9,w14 | 
|  | ext	v10.16b,v10.16b,v10.16b,#8 | 
|  | ror	w10,w10,#25 | 
|  | ext	v14.16b,v14.16b,v14.16b,#8 | 
|  | ror	w11,w11,#25 | 
|  | ext	v18.16b,v18.16b,v18.16b,#8 | 
|  | ror	w12,w12,#25 | 
|  | ext	v22.16b,v22.16b,v22.16b,#8 | 
|  | ror	w9,w9,#25 | 
|  | ext	v3.16b,v3.16b,v3.16b,#4 | 
|  | ext	v7.16b,v7.16b,v7.16b,#4 | 
|  | ext	v11.16b,v11.16b,v11.16b,#4 | 
|  | ext	v15.16b,v15.16b,v15.16b,#4 | 
|  | ext	v19.16b,v19.16b,v19.16b,#4 | 
|  | ext	v23.16b,v23.16b,v23.16b,#4 | 
|  | ext	v1.16b,v1.16b,v1.16b,#12 | 
|  | ext	v5.16b,v5.16b,v5.16b,#12 | 
|  | ext	v9.16b,v9.16b,v9.16b,#12 | 
|  | ext	v13.16b,v13.16b,v13.16b,#12 | 
|  | ext	v17.16b,v17.16b,v17.16b,#12 | 
|  | ext	v21.16b,v21.16b,v21.16b,#12 | 
|  | cbnz	x4,Loop_lower_neon | 
|  |  | 
|  | add	w5,w5,w22		// accumulate key block | 
|  | ldp	q24,q25,[sp,#0] | 
|  | add	x6,x6,x22,lsr#32 | 
|  | ldp	q26,q27,[sp,#32] | 
|  | add	w7,w7,w23 | 
|  | ldp	q28,q29,[sp,#64] | 
|  | add	x8,x8,x23,lsr#32 | 
|  | add	v0.4s,v0.4s,v24.4s | 
|  | add	w9,w9,w24 | 
|  | add	v4.4s,v4.4s,v24.4s | 
|  | add	x10,x10,x24,lsr#32 | 
|  | add	v8.4s,v8.4s,v24.4s | 
|  | add	w11,w11,w25 | 
|  | add	v12.4s,v12.4s,v24.4s | 
|  | add	x12,x12,x25,lsr#32 | 
|  | add	v16.4s,v16.4s,v24.4s | 
|  | add	w13,w13,w26 | 
|  | add	v20.4s,v20.4s,v24.4s | 
|  | add	x14,x14,x26,lsr#32 | 
|  | add	v2.4s,v2.4s,v26.4s | 
|  | add	w15,w15,w27 | 
|  | add	v6.4s,v6.4s,v26.4s | 
|  | add	x16,x16,x27,lsr#32 | 
|  | add	v10.4s,v10.4s,v26.4s | 
|  | add	w17,w17,w28 | 
|  | add	v14.4s,v14.4s,v26.4s | 
|  | add	x19,x19,x28,lsr#32 | 
|  | add	v18.4s,v18.4s,v26.4s | 
|  | add	w20,w20,w30 | 
|  | add	v22.4s,v22.4s,v26.4s | 
|  | add	x21,x21,x30,lsr#32 | 
|  | add	v19.4s,v19.4s,v31.4s			// +4 | 
|  | add	x5,x5,x6,lsl#32	// pack | 
|  | add	v23.4s,v23.4s,v31.4s			// +4 | 
|  | add	x7,x7,x8,lsl#32 | 
|  | add	v3.4s,v3.4s,v27.4s | 
|  | ldp	x6,x8,[x1,#0]		// load input | 
|  | add	v7.4s,v7.4s,v28.4s | 
|  | add	x9,x9,x10,lsl#32 | 
|  | add	v11.4s,v11.4s,v29.4s | 
|  | add	x11,x11,x12,lsl#32 | 
|  | add	v15.4s,v15.4s,v30.4s | 
|  | ldp	x10,x12,[x1,#16] | 
|  | add	v19.4s,v19.4s,v27.4s | 
|  | add	x13,x13,x14,lsl#32 | 
|  | add	v23.4s,v23.4s,v28.4s | 
|  | add	x15,x15,x16,lsl#32 | 
|  | add	v1.4s,v1.4s,v25.4s | 
|  | ldp	x14,x16,[x1,#32] | 
|  | add	v5.4s,v5.4s,v25.4s | 
|  | add	x17,x17,x19,lsl#32 | 
|  | add	v9.4s,v9.4s,v25.4s | 
|  | add	x20,x20,x21,lsl#32 | 
|  | add	v13.4s,v13.4s,v25.4s | 
|  | ldp	x19,x21,[x1,#48] | 
|  | add	v17.4s,v17.4s,v25.4s | 
|  | add	x1,x1,#64 | 
|  | add	v21.4s,v21.4s,v25.4s | 
|  |  | 
|  | #ifdef	__AARCH64EB__ | 
|  | rev	x5,x5 | 
|  | rev	x7,x7 | 
|  | rev	x9,x9 | 
|  | rev	x11,x11 | 
|  | rev	x13,x13 | 
|  | rev	x15,x15 | 
|  | rev	x17,x17 | 
|  | rev	x20,x20 | 
|  | #endif | 
|  | ld1	{v24.16b,v25.16b,v26.16b,v27.16b},[x1],#64 | 
|  | eor	x5,x5,x6 | 
|  | eor	x7,x7,x8 | 
|  | eor	x9,x9,x10 | 
|  | eor	x11,x11,x12 | 
|  | eor	x13,x13,x14 | 
|  | eor	v0.16b,v0.16b,v24.16b | 
|  | eor	x15,x15,x16 | 
|  | eor	v1.16b,v1.16b,v25.16b | 
|  | eor	x17,x17,x19 | 
|  | eor	v2.16b,v2.16b,v26.16b | 
|  | eor	x20,x20,x21 | 
|  | eor	v3.16b,v3.16b,v27.16b | 
|  | ld1	{v24.16b,v25.16b,v26.16b,v27.16b},[x1],#64 | 
|  |  | 
|  | stp	x5,x7,[x0,#0]		// store output | 
|  | add	x28,x28,#7			// increment counter | 
|  | stp	x9,x11,[x0,#16] | 
|  | stp	x13,x15,[x0,#32] | 
|  | stp	x17,x20,[x0,#48] | 
|  | add	x0,x0,#64 | 
|  | st1	{v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64 | 
|  |  | 
|  | ld1	{v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64 | 
|  | eor	v4.16b,v4.16b,v24.16b | 
|  | eor	v5.16b,v5.16b,v25.16b | 
|  | eor	v6.16b,v6.16b,v26.16b | 
|  | eor	v7.16b,v7.16b,v27.16b | 
|  | st1	{v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 | 
|  |  | 
|  | ld1	{v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 | 
|  | eor	v8.16b,v8.16b,v0.16b | 
|  | ldp	q24,q25,[sp,#0] | 
|  | eor	v9.16b,v9.16b,v1.16b | 
|  | ldp	q26,q27,[sp,#32] | 
|  | eor	v10.16b,v10.16b,v2.16b | 
|  | eor	v11.16b,v11.16b,v3.16b | 
|  | st1	{v8.16b,v9.16b,v10.16b,v11.16b},[x0],#64 | 
|  |  | 
|  | ld1	{v8.16b,v9.16b,v10.16b,v11.16b},[x1],#64 | 
|  | eor	v12.16b,v12.16b,v4.16b | 
|  | eor	v13.16b,v13.16b,v5.16b | 
|  | eor	v14.16b,v14.16b,v6.16b | 
|  | eor	v15.16b,v15.16b,v7.16b | 
|  | st1	{v12.16b,v13.16b,v14.16b,v15.16b},[x0],#64 | 
|  |  | 
|  | ld1	{v12.16b,v13.16b,v14.16b,v15.16b},[x1],#64 | 
|  | eor	v16.16b,v16.16b,v8.16b | 
|  | eor	v17.16b,v17.16b,v9.16b | 
|  | eor	v18.16b,v18.16b,v10.16b | 
|  | eor	v19.16b,v19.16b,v11.16b | 
|  | st1	{v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64 | 
|  |  | 
|  | shl	v0.4s,v31.4s,#1			// 4 -> 8 | 
|  | eor	v20.16b,v20.16b,v12.16b | 
|  | eor	v21.16b,v21.16b,v13.16b | 
|  | eor	v22.16b,v22.16b,v14.16b | 
|  | eor	v23.16b,v23.16b,v15.16b | 
|  | st1	{v20.16b,v21.16b,v22.16b,v23.16b},[x0],#64 | 
|  |  | 
|  | add	v27.4s,v27.4s,v0.4s			// += 8 | 
|  | add	v28.4s,v28.4s,v0.4s | 
|  | add	v29.4s,v29.4s,v0.4s | 
|  | add	v30.4s,v30.4s,v0.4s | 
|  |  | 
|  | b.hs	Loop_outer_512_neon | 
|  |  | 
|  | adds	x2,x2,#512 | 
|  | ushr	v0.4s,v31.4s,#2			// 4 -> 1 | 
|  |  | 
|  | ldp	d8,d9,[sp,#128+0]		// meet ABI requirements | 
|  | ldp	d10,d11,[sp,#128+16] | 
|  | ldp	d12,d13,[sp,#128+32] | 
|  | ldp	d14,d15,[sp,#128+48] | 
|  |  | 
|  | stp	q24,q31,[sp,#0]		// wipe off-load area | 
|  | stp	q24,q31,[sp,#32] | 
|  | stp	q24,q31,[sp,#64] | 
|  |  | 
|  | b.eq	Ldone_512_neon | 
|  |  | 
|  | cmp	x2,#192 | 
|  | sub	v27.4s,v27.4s,v0.4s			// -= 1 | 
|  | sub	v28.4s,v28.4s,v0.4s | 
|  | sub	v29.4s,v29.4s,v0.4s | 
|  | add	sp,sp,#128 | 
|  | b.hs	Loop_outer_neon | 
|  |  | 
|  | eor	v25.16b,v25.16b,v25.16b | 
|  | eor	v26.16b,v26.16b,v26.16b | 
|  | eor	v27.16b,v27.16b,v27.16b | 
|  | eor	v28.16b,v28.16b,v28.16b | 
|  | eor	v29.16b,v29.16b,v29.16b | 
|  | eor	v30.16b,v30.16b,v30.16b | 
|  | b	Loop_outer | 
|  |  | 
|  | Ldone_512_neon: | 
|  | ldp	x19,x20,[x29,#16] | 
|  | add	sp,sp,#128+64 | 
|  | ldp	x21,x22,[x29,#32] | 
|  | ldp	x23,x24,[x29,#48] | 
|  | ldp	x25,x26,[x29,#64] | 
|  | ldp	x27,x28,[x29,#80] | 
|  | ldp	x29,x30,[sp],#96 | 
|  | AARCH64_VALIDATE_LINK_REGISTER | 
|  | ret | 
|  |  | 
|  | #endif  // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) |