|  | // This file is generated from a similarly-named Perl script in the BoringSSL | 
|  | // source tree. Do not edit by hand. | 
|  |  | 
|  | #include <openssl/asm_base.h> | 
|  |  | 
|  | #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__) | 
|  | #include <openssl/arm_arch.h> | 
|  |  | 
|  | #if __ARM_MAX_ARCH__>=7 | 
|  | .text | 
|  |  | 
|  | .section	__TEXT,__const | 
|  | .align	5 | 
|  | Lrcon: | 
|  | .long	0x01,0x01,0x01,0x01 | 
|  | .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat | 
|  | .long	0x1b,0x1b,0x1b,0x1b | 
|  |  | 
|  | .text | 
|  |  | 
|  | .globl	_aes_hw_set_encrypt_key | 
|  | .private_extern	_aes_hw_set_encrypt_key | 
|  |  | 
|  | .align	5 | 
|  | _aes_hw_set_encrypt_key: | 
|  | Lenc_key: | 
|  | // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. | 
|  | AARCH64_VALID_CALL_TARGET | 
|  | stp	x29,x30,[sp,#-16]! | 
|  | add	x29,sp,#0 | 
|  | mov	x3,#-2 | 
|  | cmp	w1,#128 | 
|  | b.lt	Lenc_key_abort | 
|  | cmp	w1,#256 | 
|  | b.gt	Lenc_key_abort | 
|  | tst	w1,#0x3f | 
|  | b.ne	Lenc_key_abort | 
|  |  | 
|  | adrp	x3,Lrcon@PAGE | 
|  | add	x3,x3,Lrcon@PAGEOFF | 
|  | cmp	w1,#192 | 
|  |  | 
|  | eor	v0.16b,v0.16b,v0.16b | 
|  | ld1	{v3.16b},[x0],#16 | 
|  | mov	w1,#8		// reuse w1 | 
|  | ld1	{v1.4s,v2.4s},[x3],#32 | 
|  |  | 
|  | b.lt	Loop128 | 
|  | b.eq	L192 | 
|  | b	L256 | 
|  |  | 
|  | .align	4 | 
|  | Loop128: | 
|  | tbl	v6.16b,{v3.16b},v2.16b | 
|  | ext	v5.16b,v0.16b,v3.16b,#12 | 
|  | st1	{v3.4s},[x2],#16 | 
|  | aese	v6.16b,v0.16b | 
|  | subs	w1,w1,#1 | 
|  |  | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  | ext	v5.16b,v0.16b,v5.16b,#12 | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  | ext	v5.16b,v0.16b,v5.16b,#12 | 
|  | eor	v6.16b,v6.16b,v1.16b | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  | shl	v1.16b,v1.16b,#1 | 
|  | eor	v3.16b,v3.16b,v6.16b | 
|  | b.ne	Loop128 | 
|  |  | 
|  | ld1	{v1.4s},[x3] | 
|  |  | 
|  | tbl	v6.16b,{v3.16b},v2.16b | 
|  | ext	v5.16b,v0.16b,v3.16b,#12 | 
|  | st1	{v3.4s},[x2],#16 | 
|  | aese	v6.16b,v0.16b | 
|  |  | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  | ext	v5.16b,v0.16b,v5.16b,#12 | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  | ext	v5.16b,v0.16b,v5.16b,#12 | 
|  | eor	v6.16b,v6.16b,v1.16b | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  | shl	v1.16b,v1.16b,#1 | 
|  | eor	v3.16b,v3.16b,v6.16b | 
|  |  | 
|  | tbl	v6.16b,{v3.16b},v2.16b | 
|  | ext	v5.16b,v0.16b,v3.16b,#12 | 
|  | st1	{v3.4s},[x2],#16 | 
|  | aese	v6.16b,v0.16b | 
|  |  | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  | ext	v5.16b,v0.16b,v5.16b,#12 | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  | ext	v5.16b,v0.16b,v5.16b,#12 | 
|  | eor	v6.16b,v6.16b,v1.16b | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  | eor	v3.16b,v3.16b,v6.16b | 
|  | st1	{v3.4s},[x2] | 
|  | add	x2,x2,#0x50 | 
|  |  | 
|  | mov	w12,#10 | 
|  | b	Ldone | 
|  |  | 
|  | .align	4 | 
|  | L192: | 
|  | ld1	{v4.8b},[x0],#8 | 
|  | movi	v6.16b,#8			// borrow v6.16b | 
|  | st1	{v3.4s},[x2],#16 | 
|  | sub	v2.16b,v2.16b,v6.16b	// adjust the mask | 
|  |  | 
|  | Loop192: | 
|  | tbl	v6.16b,{v4.16b},v2.16b | 
|  | ext	v5.16b,v0.16b,v3.16b,#12 | 
|  | st1	{v4.8b},[x2],#8 | 
|  | aese	v6.16b,v0.16b | 
|  | subs	w1,w1,#1 | 
|  |  | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  | ext	v5.16b,v0.16b,v5.16b,#12 | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  | ext	v5.16b,v0.16b,v5.16b,#12 | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  |  | 
|  | dup	v5.4s,v3.s[3] | 
|  | eor	v5.16b,v5.16b,v4.16b | 
|  | eor	v6.16b,v6.16b,v1.16b | 
|  | ext	v4.16b,v0.16b,v4.16b,#12 | 
|  | shl	v1.16b,v1.16b,#1 | 
|  | eor	v4.16b,v4.16b,v5.16b | 
|  | eor	v3.16b,v3.16b,v6.16b | 
|  | eor	v4.16b,v4.16b,v6.16b | 
|  | st1	{v3.4s},[x2],#16 | 
|  | b.ne	Loop192 | 
|  |  | 
|  | mov	w12,#12 | 
|  | add	x2,x2,#0x20 | 
|  | b	Ldone | 
|  |  | 
|  | .align	4 | 
|  | L256: | 
|  | ld1	{v4.16b},[x0] | 
|  | mov	w1,#7 | 
|  | mov	w12,#14 | 
|  | st1	{v3.4s},[x2],#16 | 
|  |  | 
|  | Loop256: | 
|  | tbl	v6.16b,{v4.16b},v2.16b | 
|  | ext	v5.16b,v0.16b,v3.16b,#12 | 
|  | st1	{v4.4s},[x2],#16 | 
|  | aese	v6.16b,v0.16b | 
|  | subs	w1,w1,#1 | 
|  |  | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  | ext	v5.16b,v0.16b,v5.16b,#12 | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  | ext	v5.16b,v0.16b,v5.16b,#12 | 
|  | eor	v6.16b,v6.16b,v1.16b | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  | shl	v1.16b,v1.16b,#1 | 
|  | eor	v3.16b,v3.16b,v6.16b | 
|  | st1	{v3.4s},[x2],#16 | 
|  | b.eq	Ldone | 
|  |  | 
|  | dup	v6.4s,v3.s[3]		// just splat | 
|  | ext	v5.16b,v0.16b,v4.16b,#12 | 
|  | aese	v6.16b,v0.16b | 
|  |  | 
|  | eor	v4.16b,v4.16b,v5.16b | 
|  | ext	v5.16b,v0.16b,v5.16b,#12 | 
|  | eor	v4.16b,v4.16b,v5.16b | 
|  | ext	v5.16b,v0.16b,v5.16b,#12 | 
|  | eor	v4.16b,v4.16b,v5.16b | 
|  |  | 
|  | eor	v4.16b,v4.16b,v6.16b | 
|  | b	Loop256 | 
|  |  | 
|  | Ldone: | 
|  | str	w12,[x2] | 
|  | mov	x3,#0 | 
|  |  | 
|  | Lenc_key_abort: | 
|  | mov	x0,x3			// return value | 
|  | ldr	x29,[sp],#16 | 
|  | ret | 
|  |  | 
|  |  | 
|  | .globl	_aes_hw_set_decrypt_key | 
|  | .private_extern	_aes_hw_set_decrypt_key | 
|  |  | 
|  | .align	5 | 
|  | _aes_hw_set_decrypt_key: | 
|  | AARCH64_SIGN_LINK_REGISTER | 
|  | stp	x29,x30,[sp,#-16]! | 
|  | add	x29,sp,#0 | 
|  | bl	Lenc_key | 
|  |  | 
|  | cmp	x0,#0 | 
|  | b.ne	Ldec_key_abort | 
|  |  | 
|  | sub	x2,x2,#240		// restore original x2 | 
|  | mov	x4,#-16 | 
|  | add	x0,x2,x12,lsl#4	// end of key schedule | 
|  |  | 
|  | ld1	{v0.4s},[x2] | 
|  | ld1	{v1.4s},[x0] | 
|  | st1	{v0.4s},[x0],x4 | 
|  | st1	{v1.4s},[x2],#16 | 
|  |  | 
|  | Loop_imc: | 
|  | ld1	{v0.4s},[x2] | 
|  | ld1	{v1.4s},[x0] | 
|  | aesimc	v0.16b,v0.16b | 
|  | aesimc	v1.16b,v1.16b | 
|  | st1	{v0.4s},[x0],x4 | 
|  | st1	{v1.4s},[x2],#16 | 
|  | cmp	x0,x2 | 
|  | b.hi	Loop_imc | 
|  |  | 
|  | ld1	{v0.4s},[x2] | 
|  | aesimc	v0.16b,v0.16b | 
|  | st1	{v0.4s},[x0] | 
|  |  | 
|  | eor	x0,x0,x0		// return value | 
|  | Ldec_key_abort: | 
|  | ldp	x29,x30,[sp],#16 | 
|  | AARCH64_VALIDATE_LINK_REGISTER | 
|  | ret | 
|  |  | 
|  | .globl	_aes_hw_encrypt | 
|  | .private_extern	_aes_hw_encrypt | 
|  |  | 
|  | .align	5 | 
|  | _aes_hw_encrypt: | 
|  | AARCH64_VALID_CALL_TARGET | 
|  | ldr	w3,[x2,#240] | 
|  | ld1	{v0.4s},[x2],#16 | 
|  | ld1	{v2.16b},[x0] | 
|  | sub	w3,w3,#2 | 
|  | ld1	{v1.4s},[x2],#16 | 
|  |  | 
|  | Loop_enc: | 
|  | aese	v2.16b,v0.16b | 
|  | aesmc	v2.16b,v2.16b | 
|  | ld1	{v0.4s},[x2],#16 | 
|  | subs	w3,w3,#2 | 
|  | aese	v2.16b,v1.16b | 
|  | aesmc	v2.16b,v2.16b | 
|  | ld1	{v1.4s},[x2],#16 | 
|  | b.gt	Loop_enc | 
|  |  | 
|  | aese	v2.16b,v0.16b | 
|  | aesmc	v2.16b,v2.16b | 
|  | ld1	{v0.4s},[x2] | 
|  | aese	v2.16b,v1.16b | 
|  | eor	v2.16b,v2.16b,v0.16b | 
|  |  | 
|  | st1	{v2.16b},[x1] | 
|  | ret | 
|  |  | 
|  | .globl	_aes_hw_decrypt | 
|  | .private_extern	_aes_hw_decrypt | 
|  |  | 
|  | .align	5 | 
|  | _aes_hw_decrypt: | 
|  | AARCH64_VALID_CALL_TARGET | 
|  | ldr	w3,[x2,#240] | 
|  | ld1	{v0.4s},[x2],#16 | 
|  | ld1	{v2.16b},[x0] | 
|  | sub	w3,w3,#2 | 
|  | ld1	{v1.4s},[x2],#16 | 
|  |  | 
|  | Loop_dec: | 
|  | aesd	v2.16b,v0.16b | 
|  | aesimc	v2.16b,v2.16b | 
|  | ld1	{v0.4s},[x2],#16 | 
|  | subs	w3,w3,#2 | 
|  | aesd	v2.16b,v1.16b | 
|  | aesimc	v2.16b,v2.16b | 
|  | ld1	{v1.4s},[x2],#16 | 
|  | b.gt	Loop_dec | 
|  |  | 
|  | aesd	v2.16b,v0.16b | 
|  | aesimc	v2.16b,v2.16b | 
|  | ld1	{v0.4s},[x2] | 
|  | aesd	v2.16b,v1.16b | 
|  | eor	v2.16b,v2.16b,v0.16b | 
|  |  | 
|  | st1	{v2.16b},[x1] | 
|  | ret | 
|  |  | 
|  | .globl	_aes_hw_cbc_encrypt | 
|  | .private_extern	_aes_hw_cbc_encrypt | 
|  |  | 
|  | .align	5 | 
|  | _aes_hw_cbc_encrypt: | 
|  | // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. | 
|  | AARCH64_VALID_CALL_TARGET | 
|  | stp	x29,x30,[sp,#-16]! | 
|  | add	x29,sp,#0 | 
|  | subs	x2,x2,#16 | 
|  | mov	x8,#16 | 
|  | b.lo	Lcbc_abort | 
|  | csel	x8,xzr,x8,eq | 
|  |  | 
|  | cmp	w5,#0			// en- or decrypting? | 
|  | ldr	w5,[x3,#240] | 
|  | and	x2,x2,#-16 | 
|  | ld1	{v6.16b},[x4] | 
|  | ld1	{v0.16b},[x0],x8 | 
|  |  | 
|  | ld1	{v16.4s,v17.4s},[x3]		// load key schedule... | 
|  | sub	w5,w5,#6 | 
|  | add	x7,x3,x5,lsl#4	// pointer to last 7 round keys | 
|  | sub	w5,w5,#2 | 
|  | ld1	{v18.4s,v19.4s},[x7],#32 | 
|  | ld1	{v20.4s,v21.4s},[x7],#32 | 
|  | ld1	{v22.4s,v23.4s},[x7],#32 | 
|  | ld1	{v7.4s},[x7] | 
|  |  | 
|  | add	x7,x3,#32 | 
|  | mov	w6,w5 | 
|  | b.eq	Lcbc_dec | 
|  |  | 
|  | cmp	w5,#2 | 
|  | eor	v0.16b,v0.16b,v6.16b | 
|  | eor	v5.16b,v16.16b,v7.16b | 
|  | b.eq	Lcbc_enc128 | 
|  |  | 
|  | ld1	{v2.4s,v3.4s},[x7] | 
|  | add	x7,x3,#16 | 
|  | add	x6,x3,#16*4 | 
|  | add	x12,x3,#16*5 | 
|  | aese	v0.16b,v16.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | add	x14,x3,#16*6 | 
|  | add	x3,x3,#16*7 | 
|  | b	Lenter_cbc_enc | 
|  |  | 
|  | .align	4 | 
|  | Loop_cbc_enc: | 
|  | aese	v0.16b,v16.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | st1	{v6.16b},[x1],#16 | 
|  | Lenter_cbc_enc: | 
|  | aese	v0.16b,v17.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v0.16b,v2.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | ld1	{v16.4s},[x6] | 
|  | cmp	w5,#4 | 
|  | aese	v0.16b,v3.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | ld1	{v17.4s},[x12] | 
|  | b.eq	Lcbc_enc192 | 
|  |  | 
|  | aese	v0.16b,v16.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | ld1	{v16.4s},[x14] | 
|  | aese	v0.16b,v17.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | ld1	{v17.4s},[x3] | 
|  | nop | 
|  |  | 
|  | Lcbc_enc192: | 
|  | aese	v0.16b,v16.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | subs	x2,x2,#16 | 
|  | aese	v0.16b,v17.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | csel	x8,xzr,x8,eq | 
|  | aese	v0.16b,v18.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v0.16b,v19.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | ld1	{v16.16b},[x0],x8 | 
|  | aese	v0.16b,v20.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | eor	v16.16b,v16.16b,v5.16b | 
|  | aese	v0.16b,v21.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | ld1	{v17.4s},[x7]		// re-pre-load rndkey[1] | 
|  | aese	v0.16b,v22.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v0.16b,v23.16b | 
|  | eor	v6.16b,v0.16b,v7.16b | 
|  | b.hs	Loop_cbc_enc | 
|  |  | 
|  | st1	{v6.16b},[x1],#16 | 
|  | b	Lcbc_done | 
|  |  | 
|  | .align	5 | 
|  | Lcbc_enc128: | 
|  | ld1	{v2.4s,v3.4s},[x7] | 
|  | aese	v0.16b,v16.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | b	Lenter_cbc_enc128 | 
|  | Loop_cbc_enc128: | 
|  | aese	v0.16b,v16.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | st1	{v6.16b},[x1],#16 | 
|  | Lenter_cbc_enc128: | 
|  | aese	v0.16b,v17.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | subs	x2,x2,#16 | 
|  | aese	v0.16b,v2.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | csel	x8,xzr,x8,eq | 
|  | aese	v0.16b,v3.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v0.16b,v18.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v0.16b,v19.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | ld1	{v16.16b},[x0],x8 | 
|  | aese	v0.16b,v20.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v0.16b,v21.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v0.16b,v22.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | eor	v16.16b,v16.16b,v5.16b | 
|  | aese	v0.16b,v23.16b | 
|  | eor	v6.16b,v0.16b,v7.16b | 
|  | b.hs	Loop_cbc_enc128 | 
|  |  | 
|  | st1	{v6.16b},[x1],#16 | 
|  | b	Lcbc_done | 
|  | .align	5 | 
|  | Lcbc_dec: | 
|  | ld1	{v18.16b},[x0],#16 | 
|  | subs	x2,x2,#32		// bias | 
|  | add	w6,w5,#2 | 
|  | orr	v3.16b,v0.16b,v0.16b | 
|  | orr	v1.16b,v0.16b,v0.16b | 
|  | orr	v19.16b,v18.16b,v18.16b | 
|  | b.lo	Lcbc_dec_tail | 
|  |  | 
|  | orr	v1.16b,v18.16b,v18.16b | 
|  | ld1	{v18.16b},[x0],#16 | 
|  | orr	v2.16b,v0.16b,v0.16b | 
|  | orr	v3.16b,v1.16b,v1.16b | 
|  | orr	v19.16b,v18.16b,v18.16b | 
|  |  | 
|  | Loop3x_cbc_dec: | 
|  | aesd	v0.16b,v16.16b | 
|  | aesimc	v0.16b,v0.16b | 
|  | aesd	v1.16b,v16.16b | 
|  | aesimc	v1.16b,v1.16b | 
|  | aesd	v18.16b,v16.16b | 
|  | aesimc	v18.16b,v18.16b | 
|  | ld1	{v16.4s},[x7],#16 | 
|  | subs	w6,w6,#2 | 
|  | aesd	v0.16b,v17.16b | 
|  | aesimc	v0.16b,v0.16b | 
|  | aesd	v1.16b,v17.16b | 
|  | aesimc	v1.16b,v1.16b | 
|  | aesd	v18.16b,v17.16b | 
|  | aesimc	v18.16b,v18.16b | 
|  | ld1	{v17.4s},[x7],#16 | 
|  | b.gt	Loop3x_cbc_dec | 
|  |  | 
|  | aesd	v0.16b,v16.16b | 
|  | aesimc	v0.16b,v0.16b | 
|  | aesd	v1.16b,v16.16b | 
|  | aesimc	v1.16b,v1.16b | 
|  | aesd	v18.16b,v16.16b | 
|  | aesimc	v18.16b,v18.16b | 
|  | eor	v4.16b,v6.16b,v7.16b | 
|  | subs	x2,x2,#0x30 | 
|  | eor	v5.16b,v2.16b,v7.16b | 
|  | csel	x6,x2,x6,lo			// x6, w6, is zero at this point | 
|  | aesd	v0.16b,v17.16b | 
|  | aesimc	v0.16b,v0.16b | 
|  | aesd	v1.16b,v17.16b | 
|  | aesimc	v1.16b,v1.16b | 
|  | aesd	v18.16b,v17.16b | 
|  | aesimc	v18.16b,v18.16b | 
|  | eor	v17.16b,v3.16b,v7.16b | 
|  | add	x0,x0,x6		// x0 is adjusted in such way that | 
|  | // at exit from the loop v1.16b-v18.16b | 
|  | // are loaded with last "words" | 
|  | orr	v6.16b,v19.16b,v19.16b | 
|  | mov	x7,x3 | 
|  | aesd	v0.16b,v20.16b | 
|  | aesimc	v0.16b,v0.16b | 
|  | aesd	v1.16b,v20.16b | 
|  | aesimc	v1.16b,v1.16b | 
|  | aesd	v18.16b,v20.16b | 
|  | aesimc	v18.16b,v18.16b | 
|  | ld1	{v2.16b},[x0],#16 | 
|  | aesd	v0.16b,v21.16b | 
|  | aesimc	v0.16b,v0.16b | 
|  | aesd	v1.16b,v21.16b | 
|  | aesimc	v1.16b,v1.16b | 
|  | aesd	v18.16b,v21.16b | 
|  | aesimc	v18.16b,v18.16b | 
|  | ld1	{v3.16b},[x0],#16 | 
|  | aesd	v0.16b,v22.16b | 
|  | aesimc	v0.16b,v0.16b | 
|  | aesd	v1.16b,v22.16b | 
|  | aesimc	v1.16b,v1.16b | 
|  | aesd	v18.16b,v22.16b | 
|  | aesimc	v18.16b,v18.16b | 
|  | ld1	{v19.16b},[x0],#16 | 
|  | aesd	v0.16b,v23.16b | 
|  | aesd	v1.16b,v23.16b | 
|  | aesd	v18.16b,v23.16b | 
|  | ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0] | 
|  | add	w6,w5,#2 | 
|  | eor	v4.16b,v4.16b,v0.16b | 
|  | eor	v5.16b,v5.16b,v1.16b | 
|  | eor	v18.16b,v18.16b,v17.16b | 
|  | ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1] | 
|  | st1	{v4.16b},[x1],#16 | 
|  | orr	v0.16b,v2.16b,v2.16b | 
|  | st1	{v5.16b},[x1],#16 | 
|  | orr	v1.16b,v3.16b,v3.16b | 
|  | st1	{v18.16b},[x1],#16 | 
|  | orr	v18.16b,v19.16b,v19.16b | 
|  | b.hs	Loop3x_cbc_dec | 
|  |  | 
|  | cmn	x2,#0x30 | 
|  | b.eq	Lcbc_done | 
|  | nop | 
|  |  | 
|  | Lcbc_dec_tail: | 
|  | aesd	v1.16b,v16.16b | 
|  | aesimc	v1.16b,v1.16b | 
|  | aesd	v18.16b,v16.16b | 
|  | aesimc	v18.16b,v18.16b | 
|  | ld1	{v16.4s},[x7],#16 | 
|  | subs	w6,w6,#2 | 
|  | aesd	v1.16b,v17.16b | 
|  | aesimc	v1.16b,v1.16b | 
|  | aesd	v18.16b,v17.16b | 
|  | aesimc	v18.16b,v18.16b | 
|  | ld1	{v17.4s},[x7],#16 | 
|  | b.gt	Lcbc_dec_tail | 
|  |  | 
|  | aesd	v1.16b,v16.16b | 
|  | aesimc	v1.16b,v1.16b | 
|  | aesd	v18.16b,v16.16b | 
|  | aesimc	v18.16b,v18.16b | 
|  | aesd	v1.16b,v17.16b | 
|  | aesimc	v1.16b,v1.16b | 
|  | aesd	v18.16b,v17.16b | 
|  | aesimc	v18.16b,v18.16b | 
|  | aesd	v1.16b,v20.16b | 
|  | aesimc	v1.16b,v1.16b | 
|  | aesd	v18.16b,v20.16b | 
|  | aesimc	v18.16b,v18.16b | 
|  | cmn	x2,#0x20 | 
|  | aesd	v1.16b,v21.16b | 
|  | aesimc	v1.16b,v1.16b | 
|  | aesd	v18.16b,v21.16b | 
|  | aesimc	v18.16b,v18.16b | 
|  | eor	v5.16b,v6.16b,v7.16b | 
|  | aesd	v1.16b,v22.16b | 
|  | aesimc	v1.16b,v1.16b | 
|  | aesd	v18.16b,v22.16b | 
|  | aesimc	v18.16b,v18.16b | 
|  | eor	v17.16b,v3.16b,v7.16b | 
|  | aesd	v1.16b,v23.16b | 
|  | aesd	v18.16b,v23.16b | 
|  | b.eq	Lcbc_dec_one | 
|  | eor	v5.16b,v5.16b,v1.16b | 
|  | eor	v17.16b,v17.16b,v18.16b | 
|  | orr	v6.16b,v19.16b,v19.16b | 
|  | st1	{v5.16b},[x1],#16 | 
|  | st1	{v17.16b},[x1],#16 | 
|  | b	Lcbc_done | 
|  |  | 
|  | Lcbc_dec_one: | 
|  | eor	v5.16b,v5.16b,v18.16b | 
|  | orr	v6.16b,v19.16b,v19.16b | 
|  | st1	{v5.16b},[x1],#16 | 
|  |  | 
|  | Lcbc_done: | 
|  | st1	{v6.16b},[x4] | 
|  | Lcbc_abort: | 
|  | ldr	x29,[sp],#16 | 
|  | ret | 
|  |  | 
|  | .globl	_aes_hw_ctr32_encrypt_blocks | 
|  | .private_extern	_aes_hw_ctr32_encrypt_blocks | 
|  |  | 
|  | .align	5 | 
|  | _aes_hw_ctr32_encrypt_blocks: | 
|  | // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. | 
|  | AARCH64_VALID_CALL_TARGET | 
|  | stp	x29,x30,[sp,#-16]! | 
|  | add	x29,sp,#0 | 
|  | ldr	w5,[x3,#240] | 
|  |  | 
|  | ldr	w8, [x4, #12] | 
|  | ld1	{v0.4s},[x4] | 
|  |  | 
|  | ld1	{v16.4s,v17.4s},[x3]		// load key schedule... | 
|  | sub	w5,w5,#4 | 
|  | mov	x12,#16 | 
|  | cmp	x2,#2 | 
|  | add	x7,x3,x5,lsl#4	// pointer to last 5 round keys | 
|  | sub	w5,w5,#2 | 
|  | ld1	{v20.4s,v21.4s},[x7],#32 | 
|  | ld1	{v22.4s,v23.4s},[x7],#32 | 
|  | ld1	{v7.4s},[x7] | 
|  | add	x7,x3,#32 | 
|  | mov	w6,w5 | 
|  | csel	x12,xzr,x12,lo | 
|  |  | 
|  | // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are | 
|  | // affected by silicon errata #1742098 [0] and #1655431 [1], | 
|  | // respectively, where the second instruction of an aese/aesmc | 
|  | // instruction pair may execute twice if an interrupt is taken right | 
|  | // after the first instruction consumes an input register of which a | 
|  | // single 32-bit lane has been updated the last time it was modified. | 
|  | // | 
|  | // This function uses a counter in one 32-bit lane. The vmov lines | 
|  | // could write to v1.16b and v18.16b directly, but that trips this bugs. | 
|  | // We write to v6.16b and copy to the final register as a workaround. | 
|  | // | 
|  | // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice | 
|  | // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice | 
|  | #ifndef __AARCH64EB__ | 
|  | rev	w8, w8 | 
|  | #endif | 
|  | add	w10, w8, #1 | 
|  | orr	v6.16b,v0.16b,v0.16b | 
|  | rev	w10, w10 | 
|  | mov	v6.s[3],w10 | 
|  | add	w8, w8, #2 | 
|  | orr	v1.16b,v6.16b,v6.16b | 
|  | b.ls	Lctr32_tail | 
|  | rev	w12, w8 | 
|  | mov	v6.s[3],w12 | 
|  | sub	x2,x2,#3		// bias | 
|  | orr	v18.16b,v6.16b,v6.16b | 
|  | b	Loop3x_ctr32 | 
|  |  | 
|  | .align	4 | 
|  | Loop3x_ctr32: | 
|  | aese	v0.16b,v16.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v1.16b,v16.16b | 
|  | aesmc	v1.16b,v1.16b | 
|  | aese	v18.16b,v16.16b | 
|  | aesmc	v18.16b,v18.16b | 
|  | ld1	{v16.4s},[x7],#16 | 
|  | subs	w6,w6,#2 | 
|  | aese	v0.16b,v17.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v1.16b,v17.16b | 
|  | aesmc	v1.16b,v1.16b | 
|  | aese	v18.16b,v17.16b | 
|  | aesmc	v18.16b,v18.16b | 
|  | ld1	{v17.4s},[x7],#16 | 
|  | b.gt	Loop3x_ctr32 | 
|  |  | 
|  | aese	v0.16b,v16.16b | 
|  | aesmc	v4.16b,v0.16b | 
|  | aese	v1.16b,v16.16b | 
|  | aesmc	v5.16b,v1.16b | 
|  | ld1	{v2.16b},[x0],#16 | 
|  | add	w9,w8,#1 | 
|  | aese	v18.16b,v16.16b | 
|  | aesmc	v18.16b,v18.16b | 
|  | ld1	{v3.16b},[x0],#16 | 
|  | rev	w9,w9 | 
|  | aese	v4.16b,v17.16b | 
|  | aesmc	v4.16b,v4.16b | 
|  | aese	v5.16b,v17.16b | 
|  | aesmc	v5.16b,v5.16b | 
|  | ld1	{v19.16b},[x0],#16 | 
|  | mov	x7,x3 | 
|  | aese	v18.16b,v17.16b | 
|  | aesmc	v17.16b,v18.16b | 
|  | aese	v4.16b,v20.16b | 
|  | aesmc	v4.16b,v4.16b | 
|  | aese	v5.16b,v20.16b | 
|  | aesmc	v5.16b,v5.16b | 
|  | eor	v2.16b,v2.16b,v7.16b | 
|  | add	w10,w8,#2 | 
|  | aese	v17.16b,v20.16b | 
|  | aesmc	v17.16b,v17.16b | 
|  | eor	v3.16b,v3.16b,v7.16b | 
|  | add	w8,w8,#3 | 
|  | aese	v4.16b,v21.16b | 
|  | aesmc	v4.16b,v4.16b | 
|  | aese	v5.16b,v21.16b | 
|  | aesmc	v5.16b,v5.16b | 
|  | // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work | 
|  | // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in | 
|  | // 32-bit mode. See the comment above. | 
|  | eor	v19.16b,v19.16b,v7.16b | 
|  | mov	v6.s[3], w9 | 
|  | aese	v17.16b,v21.16b | 
|  | aesmc	v17.16b,v17.16b | 
|  | orr	v0.16b,v6.16b,v6.16b | 
|  | rev	w10,w10 | 
|  | aese	v4.16b,v22.16b | 
|  | aesmc	v4.16b,v4.16b | 
|  | mov	v6.s[3], w10 | 
|  | rev	w12,w8 | 
|  | aese	v5.16b,v22.16b | 
|  | aesmc	v5.16b,v5.16b | 
|  | orr	v1.16b,v6.16b,v6.16b | 
|  | mov	v6.s[3], w12 | 
|  | aese	v17.16b,v22.16b | 
|  | aesmc	v17.16b,v17.16b | 
|  | orr	v18.16b,v6.16b,v6.16b | 
|  | subs	x2,x2,#3 | 
|  | aese	v4.16b,v23.16b | 
|  | aese	v5.16b,v23.16b | 
|  | aese	v17.16b,v23.16b | 
|  |  | 
|  | eor	v2.16b,v2.16b,v4.16b | 
|  | ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0] | 
|  | st1	{v2.16b},[x1],#16 | 
|  | eor	v3.16b,v3.16b,v5.16b | 
|  | mov	w6,w5 | 
|  | st1	{v3.16b},[x1],#16 | 
|  | eor	v19.16b,v19.16b,v17.16b | 
|  | ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1] | 
|  | st1	{v19.16b},[x1],#16 | 
|  | b.hs	Loop3x_ctr32 | 
|  |  | 
|  | adds	x2,x2,#3 | 
|  | b.eq	Lctr32_done | 
|  | cmp	x2,#1 | 
|  | mov	x12,#16 | 
|  | csel	x12,xzr,x12,eq | 
|  |  | 
|  | Lctr32_tail: | 
|  | aese	v0.16b,v16.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v1.16b,v16.16b | 
|  | aesmc	v1.16b,v1.16b | 
|  | ld1	{v16.4s},[x7],#16 | 
|  | subs	w6,w6,#2 | 
|  | aese	v0.16b,v17.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v1.16b,v17.16b | 
|  | aesmc	v1.16b,v1.16b | 
|  | ld1	{v17.4s},[x7],#16 | 
|  | b.gt	Lctr32_tail | 
|  |  | 
|  | aese	v0.16b,v16.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v1.16b,v16.16b | 
|  | aesmc	v1.16b,v1.16b | 
|  | aese	v0.16b,v17.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v1.16b,v17.16b | 
|  | aesmc	v1.16b,v1.16b | 
|  | ld1	{v2.16b},[x0],x12 | 
|  | aese	v0.16b,v20.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v1.16b,v20.16b | 
|  | aesmc	v1.16b,v1.16b | 
|  | ld1	{v3.16b},[x0] | 
|  | aese	v0.16b,v21.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v1.16b,v21.16b | 
|  | aesmc	v1.16b,v1.16b | 
|  | eor	v2.16b,v2.16b,v7.16b | 
|  | aese	v0.16b,v22.16b | 
|  | aesmc	v0.16b,v0.16b | 
|  | aese	v1.16b,v22.16b | 
|  | aesmc	v1.16b,v1.16b | 
|  | eor	v3.16b,v3.16b,v7.16b | 
|  | aese	v0.16b,v23.16b | 
|  | aese	v1.16b,v23.16b | 
|  |  | 
|  | cmp	x2,#1 | 
|  | eor	v2.16b,v2.16b,v0.16b | 
|  | eor	v3.16b,v3.16b,v1.16b | 
|  | st1	{v2.16b},[x1],#16 | 
|  | b.eq	Lctr32_done | 
|  | st1	{v3.16b},[x1] | 
|  |  | 
|  | Lctr32_done: | 
|  | ldr	x29,[sp],#16 | 
|  | ret | 
|  |  | 
|  | #endif | 
|  | #endif  // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__APPLE__) |