|  | // This file is generated from a similarly-named Perl script in the BoringSSL | 
|  | // source tree. Do not edit by hand. | 
|  |  | 
|  | #include <openssl/asm_base.h> | 
|  |  | 
|  | #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) | 
|  | #include <openssl/arm_arch.h> | 
|  | #if __ARM_MAX_ARCH__ >= 8 | 
|  |  | 
|  | .arch	armv8-a+crypto | 
|  | .text | 
|  | .globl	aes_gcm_enc_kernel | 
|  |  | 
|  | .def aes_gcm_enc_kernel | 
|  | .type 32 | 
|  | .endef | 
|  | .align	4 | 
|  | aes_gcm_enc_kernel: | 
|  | AARCH64_SIGN_LINK_REGISTER | 
|  | stp	x29, x30, [sp, #-128]! | 
|  | mov	x29, sp | 
|  | stp	x19, x20, [sp, #16] | 
|  | mov	x16, x4 | 
|  | mov	x8, x5 | 
|  | stp	x21, x22, [sp, #32] | 
|  | stp	x23, x24, [sp, #48] | 
|  | stp	d8, d9, [sp, #64] | 
|  | stp	d10, d11, [sp, #80] | 
|  | stp	d12, d13, [sp, #96] | 
|  | stp	d14, d15, [sp, #112] | 
|  | ldr	w17, [x8, #240] | 
|  | add	x19, x8, x17, lsl #4                   // borrow input_l1 for last key | 
|  | ldp	x13, x14, [x19]                       // load round N keys | 
|  | ldr	q31, [x19, #-16]                        // load round N-1 keys | 
|  | add	x4, x0, x1, lsr #3   // end_input_ptr | 
|  | lsr	x5, x1, #3              // byte_len | 
|  | mov	x15, x5 | 
|  | ldp	x10, x11, [x16]              // ctr96_b64, ctr96_t32 | 
|  | ld1	{ v0.16b}, [x16]                             // special case vector load initial counter so we can start first AES block as quickly as possible | 
|  | sub	x5, x5, #1      // byte_len - 1 | 
|  | ldr	q18, [x8, #0]                                  // load rk0 | 
|  | and	x5, x5, #0xffffffffffffffc0 // number of bytes to be processed in main loop (at least 1 byte must be handled by tail) | 
|  | ldr	q25, [x8, #112]                                // load rk7 | 
|  | add	x5, x5, x0 | 
|  | lsr	x12, x11, #32 | 
|  | fmov	d2, x10                               // CTR block 2 | 
|  | orr	w11, w11, w11 | 
|  | rev	w12, w12                                // rev_ctr32 | 
|  | fmov	d1, x10                               // CTR block 1 | 
|  | aese	v0.16b, v18.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 0 | 
|  | add	w12, w12, #1                            // increment rev_ctr32 | 
|  | rev	w9, w12                                 // CTR block 1 | 
|  | fmov	d3, x10                               // CTR block 3 | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 1 | 
|  | add	w12, w12, #1                            // CTR block 1 | 
|  | ldr	q19, [x8, #16]                                 // load rk1 | 
|  | fmov	v1.d[1], x9                               // CTR block 1 | 
|  | rev	w9, w12                                 // CTR block 2 | 
|  | add	w12, w12, #1                            // CTR block 2 | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 2 | 
|  | ldr	q20, [x8, #32]                                 // load rk2 | 
|  | fmov	v2.d[1], x9                               // CTR block 2 | 
|  | rev	w9, w12                                 // CTR block 3 | 
|  | aese	v0.16b, v19.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 1 | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 3 | 
|  | fmov	v3.d[1], x9                               // CTR block 3 | 
|  | aese	v1.16b, v18.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 0 | 
|  | ldr	q21, [x8, #48]                                 // load rk3 | 
|  | aese	v0.16b, v20.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 2 | 
|  | ldr	q24, [x8, #96]                                 // load rk6 | 
|  | aese	v2.16b, v18.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 0 | 
|  | ldr	q23, [x8, #80]                                 // load rk5 | 
|  | aese	v1.16b, v19.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 1 | 
|  | ldr	q14, [x6, #48]                              // load h3l | h3h | 
|  | ext	v14.16b, v14.16b, v14.16b, #8 | 
|  | aese	v3.16b, v18.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 0 | 
|  | aese	v2.16b, v19.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 1 | 
|  | ldr	q22, [x8, #64]                                 // load rk4 | 
|  | aese	v1.16b, v20.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 2 | 
|  | ldr	q13, [x6, #32]                              // load h2l | h2h | 
|  | ext	v13.16b, v13.16b, v13.16b, #8 | 
|  | aese	v3.16b, v19.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 1 | 
|  | ldr	q30, [x8, #192]                               // load rk12 | 
|  | aese	v2.16b, v20.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 2 | 
|  | ldr	q15, [x6, #80]                              // load h4l | h4h | 
|  | ext	v15.16b, v15.16b, v15.16b, #8 | 
|  | aese	v1.16b, v21.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 3 | 
|  | ldr	q29, [x8, #176]                               // load rk11 | 
|  | aese	v3.16b, v20.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 2 | 
|  | ldr	q26, [x8, #128]                                // load rk8 | 
|  | aese	v2.16b, v21.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 3 | 
|  | add	w12, w12, #1                            // CTR block 3 | 
|  | aese	v0.16b, v21.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 3 | 
|  | aese	v3.16b, v21.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 3 | 
|  | ld1	{ v11.16b}, [x3] | 
|  | ext	v11.16b, v11.16b, v11.16b, #8 | 
|  | rev64	v11.16b, v11.16b | 
|  | aese	v2.16b, v22.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 4 | 
|  | aese	v0.16b, v22.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 4 | 
|  | aese	v1.16b, v22.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 4 | 
|  | aese	v3.16b, v22.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 4 | 
|  | cmp	x17, #12                                      // setup flags for AES-128/192/256 check | 
|  | aese	v0.16b, v23.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 5 | 
|  | aese	v1.16b, v23.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 5 | 
|  | aese	v3.16b, v23.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 5 | 
|  | aese	v2.16b, v23.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 5 | 
|  | aese	v1.16b, v24.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 6 | 
|  | trn2	v17.2d,  v14.2d,    v15.2d                      // h4l | h3l | 
|  | aese	v3.16b, v24.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 6 | 
|  | ldr	q27, [x8, #144]                                // load rk9 | 
|  | aese	v0.16b, v24.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 6 | 
|  | ldr	q12, [x6]                                   // load h1l | h1h | 
|  | ext	v12.16b, v12.16b, v12.16b, #8 | 
|  | aese	v2.16b, v24.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 6 | 
|  | ldr	q28, [x8, #160]                               // load rk10 | 
|  | aese	v1.16b, v25.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 7 | 
|  | trn1	v9.2d, v14.2d,    v15.2d                      // h4h | h3h | 
|  | aese	v0.16b, v25.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 7 | 
|  | aese	v2.16b, v25.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 7 | 
|  | aese	v3.16b, v25.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 7 | 
|  | trn2	v16.2d,  v12.2d,    v13.2d                      // h2l | h1l | 
|  | aese	v1.16b, v26.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 8 | 
|  | aese	v2.16b, v26.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 8 | 
|  | aese	v3.16b, v26.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 8 | 
|  | aese	v0.16b, v26.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 8 | 
|  | b.lt	Lenc_finish_first_blocks                         // branch if AES-128 | 
|  |  | 
|  | aese	v1.16b, v27.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 9 | 
|  | aese	v2.16b, v27.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 9 | 
|  | aese	v3.16b, v27.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 9 | 
|  | aese	v0.16b, v27.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 9 | 
|  | aese	v1.16b, v28.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 10 | 
|  | aese	v2.16b, v28.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 10 | 
|  | aese	v3.16b, v28.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 10 | 
|  | aese	v0.16b, v28.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 10 | 
|  | b.eq	Lenc_finish_first_blocks                         // branch if AES-192 | 
|  |  | 
|  | aese	v1.16b, v29.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 11 | 
|  | aese	v2.16b, v29.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 11 | 
|  | aese	v0.16b, v29.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 11 | 
|  | aese	v3.16b, v29.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 11 | 
|  | aese	v1.16b, v30.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 12 | 
|  | aese	v2.16b, v30.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 12 | 
|  | aese	v0.16b, v30.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 12 | 
|  | aese	v3.16b, v30.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 12 | 
|  |  | 
|  | Lenc_finish_first_blocks: | 
|  | cmp	x0, x5                   // check if we have <= 4 blocks | 
|  | eor	v17.16b, v17.16b, v9.16b                  // h4k | h3k | 
|  | aese	v2.16b, v31.16b                                    // AES block 2 - round N-1 | 
|  | trn1	v8.2d,    v12.2d,    v13.2d                      // h2h | h1h | 
|  | aese	v1.16b, v31.16b                                    // AES block 1 - round N-1 | 
|  | aese	v0.16b, v31.16b                                    // AES block 0 - round N-1 | 
|  | aese	v3.16b, v31.16b                                    // AES block 3 - round N-1 | 
|  | eor	v16.16b, v16.16b, v8.16b                     // h2k | h1k | 
|  | b.ge	Lenc_tail                                        // handle tail | 
|  |  | 
|  | ldp	x19, x20, [x0, #16]           // AES block 1 - load plaintext | 
|  | rev	w9, w12                                 // CTR block 4 | 
|  | ldp	x6, x7, [x0, #0]            // AES block 0 - load plaintext | 
|  | ldp	x23, x24, [x0, #48]           // AES block 3 - load plaintext | 
|  | ldp	x21, x22, [x0, #32]           // AES block 2 - load plaintext | 
|  | add	x0, x0, #64                       // AES input_ptr update | 
|  | eor	x19, x19, x13                      // AES block 1 - round N low | 
|  | eor	x20, x20, x14                      // AES block 1 - round N high | 
|  | fmov	d5, x19                               // AES block 1 - mov low | 
|  | eor	x6, x6, x13                      // AES block 0 - round N low | 
|  | eor	x7, x7, x14                      // AES block 0 - round N high | 
|  | eor	x24, x24, x14                      // AES block 3 - round N high | 
|  | fmov	d4, x6                               // AES block 0 - mov low | 
|  | cmp	x0, x5                   // check if we have <= 8 blocks | 
|  | fmov	v4.d[1], x7                           // AES block 0 - mov high | 
|  | eor	x23, x23, x13                      // AES block 3 - round N low | 
|  | eor	x21, x21, x13                      // AES block 2 - round N low | 
|  | fmov	v5.d[1], x20                           // AES block 1 - mov high | 
|  | fmov	d6, x21                               // AES block 2 - mov low | 
|  | add	w12, w12, #1                            // CTR block 4 | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 4 | 
|  | fmov	d7, x23                               // AES block 3 - mov low | 
|  | eor	x22, x22, x14                      // AES block 2 - round N high | 
|  | fmov	v6.d[1], x22                           // AES block 2 - mov high | 
|  | eor	v4.16b, v4.16b, v0.16b                          // AES block 0 - result | 
|  | fmov	d0, x10                               // CTR block 4 | 
|  | fmov	v0.d[1], x9                               // CTR block 4 | 
|  | rev	w9, w12                                 // CTR block 5 | 
|  | add	w12, w12, #1                            // CTR block 5 | 
|  | eor	v5.16b, v5.16b, v1.16b                          // AES block 1 - result | 
|  | fmov	d1, x10                               // CTR block 5 | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 5 | 
|  | fmov	v1.d[1], x9                               // CTR block 5 | 
|  | rev	w9, w12                                 // CTR block 6 | 
|  | st1	{ v4.16b}, [x2], #16                     // AES block 0 - store result | 
|  | fmov	v7.d[1], x24                           // AES block 3 - mov high | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 6 | 
|  | eor	v6.16b, v6.16b, v2.16b                          // AES block 2 - result | 
|  | st1	{ v5.16b}, [x2], #16                     // AES block 1 - store result | 
|  | add	w12, w12, #1                            // CTR block 6 | 
|  | fmov	d2, x10                               // CTR block 6 | 
|  | fmov	v2.d[1], x9                               // CTR block 6 | 
|  | st1	{ v6.16b}, [x2], #16                     // AES block 2 - store result | 
|  | rev	w9, w12                                 // CTR block 7 | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 7 | 
|  | eor	v7.16b, v7.16b, v3.16b                          // AES block 3 - result | 
|  | st1	{ v7.16b}, [x2], #16                     // AES block 3 - store result | 
|  | b.ge	Lenc_prepretail                                  // do prepretail | 
|  |  | 
|  | Lenc_main_loop:	//	main loop start | 
|  | aese	v0.16b, v18.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 0 | 
|  | rev64	v4.16b, v4.16b                                    // GHASH block 4k (only t0 is free) | 
|  | aese	v1.16b, v18.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 0 | 
|  | fmov	d3, x10                               // CTR block 4k+3 | 
|  | aese	v2.16b, v18.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 0 | 
|  | ext	v11.16b, v11.16b, v11.16b, #8                     // PRE 0 | 
|  | aese	v0.16b, v19.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 1 | 
|  | fmov	v3.d[1], x9                               // CTR block 4k+3 | 
|  | aese	v1.16b, v19.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 1 | 
|  | ldp	x23, x24, [x0, #48]           // AES block 4k+7 - load plaintext | 
|  | aese	v2.16b, v19.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 1 | 
|  | ldp	x21, x22, [x0, #32]           // AES block 4k+6 - load plaintext | 
|  | aese	v0.16b, v20.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 2 | 
|  | eor	v4.16b, v4.16b, v11.16b                           // PRE 1 | 
|  | aese	v1.16b, v20.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 2 | 
|  | aese	v3.16b, v18.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 0 | 
|  | eor	x23, x23, x13                      // AES block 4k+7 - round N low | 
|  | aese	v0.16b, v21.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 3 | 
|  | mov	d10, v17.d[1]                               // GHASH block 4k - mid | 
|  | pmull2	v9.1q, v4.2d, v15.2d                       // GHASH block 4k - high | 
|  | eor	x22, x22, x14                      // AES block 4k+6 - round N high | 
|  | mov	d8, v4.d[1]                                  // GHASH block 4k - mid | 
|  | aese	v3.16b, v19.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 1 | 
|  | rev64	v5.16b, v5.16b                                    // GHASH block 4k+1 (t0 and t1 free) | 
|  | aese	v0.16b, v22.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 4 | 
|  | pmull	v11.1q, v4.1d, v15.1d                       // GHASH block 4k - low | 
|  | eor	v8.8b, v8.8b, v4.8b                          // GHASH block 4k - mid | 
|  | aese	v2.16b, v20.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 2 | 
|  | aese	v0.16b, v23.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 5 | 
|  | rev64	v7.16b, v7.16b                                    // GHASH block 4k+3 (t0, t1, t2 and t3 free) | 
|  | pmull2	v4.1q, v5.2d, v14.2d                          // GHASH block 4k+1 - high | 
|  | pmull	v10.1q, v8.1d, v10.1d                      // GHASH block 4k - mid | 
|  | rev64	v6.16b, v6.16b                                    // GHASH block 4k+2 (t0, t1, and t2 free) | 
|  | pmull	v8.1q, v5.1d, v14.1d                          // GHASH block 4k+1 - low | 
|  | eor	v9.16b, v9.16b, v4.16b                         // GHASH block 4k+1 - high | 
|  | mov	d4, v5.d[1]                                  // GHASH block 4k+1 - mid | 
|  | aese	v1.16b, v21.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 3 | 
|  | aese	v3.16b, v20.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 2 | 
|  | eor	v11.16b, v11.16b, v8.16b                         // GHASH block 4k+1 - low | 
|  | aese	v2.16b, v21.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 3 | 
|  | aese	v1.16b, v22.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 4 | 
|  | mov	d8, v6.d[1]                                  // GHASH block 4k+2 - mid | 
|  | aese	v3.16b, v21.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 3 | 
|  | eor	v4.8b, v4.8b, v5.8b                          // GHASH block 4k+1 - mid | 
|  | aese	v2.16b, v22.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 4 | 
|  | aese	v0.16b, v24.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 6 | 
|  | eor	v8.8b, v8.8b, v6.8b                          // GHASH block 4k+2 - mid | 
|  | aese	v3.16b, v22.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 4 | 
|  | pmull	v4.1q, v4.1d, v17.1d                          // GHASH block 4k+1 - mid | 
|  | aese	v0.16b, v25.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 7 | 
|  | aese	v3.16b, v23.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 5 | 
|  | ins	v8.d[1], v8.d[0]                                // GHASH block 4k+2 - mid | 
|  | aese	v1.16b, v23.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 5 | 
|  | aese	v0.16b, v26.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 8 | 
|  | aese	v2.16b, v23.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 5 | 
|  | aese	v1.16b, v24.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 6 | 
|  | eor	v10.16b, v10.16b, v4.16b                         // GHASH block 4k+1 - mid | 
|  | pmull2	v4.1q, v6.2d, v13.2d                          // GHASH block 4k+2 - high | 
|  | pmull	v5.1q, v6.1d, v13.1d                          // GHASH block 4k+2 - low | 
|  | aese	v1.16b, v25.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 7 | 
|  | pmull	v6.1q, v7.1d, v12.1d                          // GHASH block 4k+3 - low | 
|  | eor	v9.16b, v9.16b, v4.16b                         // GHASH block 4k+2 - high | 
|  | aese	v3.16b, v24.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 6 | 
|  | ldp	x19, x20, [x0, #16]           // AES block 4k+5 - load plaintext | 
|  | aese	v1.16b, v26.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 8 | 
|  | mov	d4, v7.d[1]                                  // GHASH block 4k+3 - mid | 
|  | aese	v2.16b, v24.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 6 | 
|  | eor	v11.16b, v11.16b, v5.16b                         // GHASH block 4k+2 - low | 
|  | pmull2	v8.1q, v8.2d, v16.2d                          // GHASH block 4k+2 - mid | 
|  | pmull2	v5.1q, v7.2d, v12.2d                          // GHASH block 4k+3 - high | 
|  | eor	v4.8b, v4.8b, v7.8b                          // GHASH block 4k+3 - mid | 
|  | aese	v2.16b, v25.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 7 | 
|  | eor	x19, x19, x13                      // AES block 4k+5 - round N low | 
|  | aese	v2.16b, v26.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 8 | 
|  | eor	v10.16b, v10.16b, v8.16b                         // GHASH block 4k+2 - mid | 
|  | aese	v3.16b, v25.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 7 | 
|  | eor	x21, x21, x13                      // AES block 4k+6 - round N low | 
|  | aese	v3.16b, v26.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 8 | 
|  | movi	v8.8b, #0xc2 | 
|  | pmull	v4.1q, v4.1d, v16.1d                          // GHASH block 4k+3 - mid | 
|  | eor	v9.16b, v9.16b, v5.16b                         // GHASH block 4k+3 - high | 
|  | cmp	x17, #12                                      // setup flags for AES-128/192/256 check | 
|  | fmov	d5, x19                               // AES block 4k+5 - mov low | 
|  | ldp	x6, x7, [x0, #0]            // AES block 4k+4 - load plaintext | 
|  | b.lt	Lenc_main_loop_continue                          // branch if AES-128 | 
|  |  | 
|  | aese	v1.16b, v27.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 9 | 
|  | aese	v0.16b, v27.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 9 | 
|  | aese	v2.16b, v27.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 9 | 
|  | aese	v3.16b, v27.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 9 | 
|  | aese	v0.16b, v28.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 10 | 
|  | aese	v1.16b, v28.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 10 | 
|  | aese	v2.16b, v28.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 10 | 
|  | aese	v3.16b, v28.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 10 | 
|  | b.eq	Lenc_main_loop_continue                          // branch if AES-192 | 
|  |  | 
|  | aese	v0.16b, v29.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 11 | 
|  | aese	v1.16b, v29.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 11 | 
|  | aese	v2.16b, v29.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 11 | 
|  | aese	v3.16b, v29.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 11 | 
|  | aese	v1.16b, v30.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 12 | 
|  | aese	v0.16b, v30.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 12 | 
|  | aese	v2.16b, v30.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 12 | 
|  | aese	v3.16b, v30.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 12 | 
|  |  | 
|  | Lenc_main_loop_continue: | 
|  | shl	d8, d8, #56               // mod_constant | 
|  | eor	v11.16b, v11.16b, v6.16b                         // GHASH block 4k+3 - low | 
|  | eor	v10.16b, v10.16b, v4.16b                         // GHASH block 4k+3 - mid | 
|  | add	w12, w12, #1                            // CTR block 4k+3 | 
|  | eor	v4.16b, v11.16b, v9.16b                         // MODULO - karatsuba tidy up | 
|  | add	x0, x0, #64                       // AES input_ptr update | 
|  | pmull	v7.1q, v9.1d, v8.1d            // MODULO - top 64b align with mid | 
|  | rev	w9, w12                                 // CTR block 4k+8 | 
|  | ext	v9.16b, v9.16b, v9.16b, #8                     // MODULO - other top alignment | 
|  | eor	x6, x6, x13                      // AES block 4k+4 - round N low | 
|  | eor	v10.16b, v10.16b, v4.16b                         // MODULO - karatsuba tidy up | 
|  | eor	x7, x7, x14                      // AES block 4k+4 - round N high | 
|  | fmov	d4, x6                               // AES block 4k+4 - mov low | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 4k+8 | 
|  | eor	v7.16b, v9.16b, v7.16b                   // MODULO - fold into mid | 
|  | eor	x20, x20, x14                      // AES block 4k+5 - round N high | 
|  | eor	x24, x24, x14                      // AES block 4k+7 - round N high | 
|  | add	w12, w12, #1                            // CTR block 4k+8 | 
|  | aese	v0.16b, v31.16b                                    // AES block 4k+4 - round N-1 | 
|  | fmov	v4.d[1], x7                           // AES block 4k+4 - mov high | 
|  | eor	v10.16b, v10.16b, v7.16b                      // MODULO - fold into mid | 
|  | fmov	d7, x23                               // AES block 4k+7 - mov low | 
|  | aese	v1.16b, v31.16b                                    // AES block 4k+5 - round N-1 | 
|  | fmov	v5.d[1], x20                           // AES block 4k+5 - mov high | 
|  | fmov	d6, x21                               // AES block 4k+6 - mov low | 
|  | cmp	x0, x5                   // LOOP CONTROL | 
|  | fmov	v6.d[1], x22                           // AES block 4k+6 - mov high | 
|  | pmull	v9.1q, v10.1d, v8.1d            // MODULO - mid 64b align with low | 
|  | eor	v4.16b, v4.16b, v0.16b                          // AES block 4k+4 - result | 
|  | fmov	d0, x10                               // CTR block 4k+8 | 
|  | fmov	v0.d[1], x9                               // CTR block 4k+8 | 
|  | rev	w9, w12                                 // CTR block 4k+9 | 
|  | add	w12, w12, #1                            // CTR block 4k+9 | 
|  | eor	v5.16b, v5.16b, v1.16b                          // AES block 4k+5 - result | 
|  | fmov	d1, x10                               // CTR block 4k+9 | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 4k+9 | 
|  | fmov	v1.d[1], x9                               // CTR block 4k+9 | 
|  | aese	v2.16b, v31.16b                                    // AES block 4k+6 - round N-1 | 
|  | rev	w9, w12                                 // CTR block 4k+10 | 
|  | st1	{ v4.16b}, [x2], #16                     // AES block 4k+4 - store result | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 4k+10 | 
|  | eor	v11.16b, v11.16b, v9.16b                         // MODULO - fold into low | 
|  | fmov	v7.d[1], x24                           // AES block 4k+7 - mov high | 
|  | ext	v10.16b, v10.16b, v10.16b, #8                     // MODULO - other mid alignment | 
|  | st1	{ v5.16b}, [x2], #16                     // AES block 4k+5 - store result | 
|  | add	w12, w12, #1                            // CTR block 4k+10 | 
|  | aese	v3.16b, v31.16b                                    // AES block 4k+7 - round N-1 | 
|  | eor	v6.16b, v6.16b, v2.16b                          // AES block 4k+6 - result | 
|  | fmov	d2, x10                               // CTR block 4k+10 | 
|  | st1	{ v6.16b}, [x2], #16                     // AES block 4k+6 - store result | 
|  | fmov	v2.d[1], x9                               // CTR block 4k+10 | 
|  | rev	w9, w12                                 // CTR block 4k+11 | 
|  | eor	v11.16b, v11.16b, v10.16b                         // MODULO - fold into low | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 4k+11 | 
|  | eor	v7.16b, v7.16b, v3.16b                          // AES block 4k+7 - result | 
|  | st1	{ v7.16b}, [x2], #16                     // AES block 4k+7 - store result | 
|  | b.lt	Lenc_main_loop | 
|  |  | 
|  | Lenc_prepretail:	//	PREPRETAIL | 
|  | aese	v1.16b, v18.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 0 | 
|  | rev64	v6.16b, v6.16b                                    // GHASH block 4k+2 (t0, t1, and t2 free) | 
|  | aese	v2.16b, v18.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 0 | 
|  | fmov	d3, x10                               // CTR block 4k+3 | 
|  | aese	v0.16b, v18.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 0 | 
|  | rev64	v4.16b, v4.16b                                    // GHASH block 4k (only t0 is free) | 
|  | fmov	v3.d[1], x9                               // CTR block 4k+3 | 
|  | ext	v11.16b, v11.16b, v11.16b, #8                     // PRE 0 | 
|  | aese	v2.16b, v19.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 1 | 
|  | aese	v0.16b, v19.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 1 | 
|  | eor	v4.16b, v4.16b, v11.16b                           // PRE 1 | 
|  | rev64	v5.16b, v5.16b                                    // GHASH block 4k+1 (t0 and t1 free) | 
|  | aese	v2.16b, v20.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 2 | 
|  | aese	v3.16b, v18.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 0 | 
|  | mov	d10, v17.d[1]                               // GHASH block 4k - mid | 
|  | aese	v1.16b, v19.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 1 | 
|  | pmull	v11.1q, v4.1d, v15.1d                       // GHASH block 4k - low | 
|  | mov	d8, v4.d[1]                                  // GHASH block 4k - mid | 
|  | pmull2	v9.1q, v4.2d, v15.2d                       // GHASH block 4k - high | 
|  | aese	v2.16b, v21.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 3 | 
|  | aese	v1.16b, v20.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 2 | 
|  | eor	v8.8b, v8.8b, v4.8b                          // GHASH block 4k - mid | 
|  | aese	v0.16b, v20.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 2 | 
|  | aese	v3.16b, v19.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 1 | 
|  | aese	v1.16b, v21.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 3 | 
|  | pmull	v10.1q, v8.1d, v10.1d                      // GHASH block 4k - mid | 
|  | pmull2	v4.1q, v5.2d, v14.2d                          // GHASH block 4k+1 - high | 
|  | pmull	v8.1q, v5.1d, v14.1d                          // GHASH block 4k+1 - low | 
|  | aese	v3.16b, v20.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 2 | 
|  | eor	v9.16b, v9.16b, v4.16b                         // GHASH block 4k+1 - high | 
|  | mov	d4, v5.d[1]                                  // GHASH block 4k+1 - mid | 
|  | aese	v0.16b, v21.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 3 | 
|  | eor	v11.16b, v11.16b, v8.16b                         // GHASH block 4k+1 - low | 
|  | aese	v3.16b, v21.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 3 | 
|  | eor	v4.8b, v4.8b, v5.8b                          // GHASH block 4k+1 - mid | 
|  | mov	d8, v6.d[1]                                  // GHASH block 4k+2 - mid | 
|  | aese	v0.16b, v22.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 4 | 
|  | rev64	v7.16b, v7.16b                                    // GHASH block 4k+3 (t0, t1, t2 and t3 free) | 
|  | aese	v3.16b, v22.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 4 | 
|  | pmull	v4.1q, v4.1d, v17.1d                          // GHASH block 4k+1 - mid | 
|  | eor	v8.8b, v8.8b, v6.8b                          // GHASH block 4k+2 - mid | 
|  | add	w12, w12, #1                            // CTR block 4k+3 | 
|  | pmull	v5.1q, v6.1d, v13.1d                          // GHASH block 4k+2 - low | 
|  | aese	v3.16b, v23.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 5 | 
|  | aese	v2.16b, v22.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 4 | 
|  | eor	v10.16b, v10.16b, v4.16b                         // GHASH block 4k+1 - mid | 
|  | pmull2	v4.1q, v6.2d, v13.2d                          // GHASH block 4k+2 - high | 
|  | eor	v11.16b, v11.16b, v5.16b                         // GHASH block 4k+2 - low | 
|  | ins	v8.d[1], v8.d[0]                                // GHASH block 4k+2 - mid | 
|  | aese	v2.16b, v23.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 5 | 
|  | eor	v9.16b, v9.16b, v4.16b                         // GHASH block 4k+2 - high | 
|  | mov	d4, v7.d[1]                                  // GHASH block 4k+3 - mid | 
|  | aese	v1.16b, v22.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 4 | 
|  | pmull2	v8.1q, v8.2d, v16.2d                          // GHASH block 4k+2 - mid | 
|  | eor	v4.8b, v4.8b, v7.8b                          // GHASH block 4k+3 - mid | 
|  | pmull2	v5.1q, v7.2d, v12.2d                          // GHASH block 4k+3 - high | 
|  | aese	v1.16b, v23.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 5 | 
|  | pmull	v4.1q, v4.1d, v16.1d                          // GHASH block 4k+3 - mid | 
|  | eor	v10.16b, v10.16b, v8.16b                         // GHASH block 4k+2 - mid | 
|  | aese	v0.16b, v23.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 5 | 
|  | aese	v1.16b, v24.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 6 | 
|  | aese	v2.16b, v24.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 6 | 
|  | aese	v0.16b, v24.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 6 | 
|  | movi	v8.8b, #0xc2 | 
|  | aese	v3.16b, v24.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 6 | 
|  | aese	v1.16b, v25.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 7 | 
|  | eor	v9.16b, v9.16b, v5.16b                         // GHASH block 4k+3 - high | 
|  | aese	v0.16b, v25.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 7 | 
|  | aese	v3.16b, v25.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 7 | 
|  | shl	d8, d8, #56               // mod_constant | 
|  | aese	v1.16b, v26.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 8 | 
|  | eor	v10.16b, v10.16b, v4.16b                         // GHASH block 4k+3 - mid | 
|  | pmull	v6.1q, v7.1d, v12.1d                          // GHASH block 4k+3 - low | 
|  | aese	v3.16b, v26.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 8 | 
|  | cmp	x17, #12                                      // setup flags for AES-128/192/256 check | 
|  | aese	v0.16b, v26.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 8 | 
|  | eor	v11.16b, v11.16b, v6.16b                         // GHASH block 4k+3 - low | 
|  | aese	v2.16b, v25.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 7 | 
|  | eor	v10.16b, v10.16b, v9.16b                         // karatsuba tidy up | 
|  | aese	v2.16b, v26.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 8 | 
|  | pmull	v4.1q, v9.1d, v8.1d | 
|  | ext	v9.16b, v9.16b, v9.16b, #8 | 
|  | eor	v10.16b, v10.16b, v11.16b | 
|  | b.lt	Lenc_finish_prepretail                           // branch if AES-128 | 
|  |  | 
|  | aese	v1.16b, v27.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 9 | 
|  | aese	v3.16b, v27.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 9 | 
|  | aese	v0.16b, v27.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 9 | 
|  | aese	v2.16b, v27.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 9 | 
|  | aese	v3.16b, v28.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 10 | 
|  | aese	v1.16b, v28.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 10 | 
|  | aese	v0.16b, v28.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 10 | 
|  | aese	v2.16b, v28.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 10 | 
|  | b.eq	Lenc_finish_prepretail                           // branch if AES-192 | 
|  |  | 
|  | aese	v1.16b, v29.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 11 | 
|  | aese	v0.16b, v29.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 11 | 
|  | aese	v3.16b, v29.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 11 | 
|  | aese	v2.16b, v29.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 11 | 
|  | aese	v1.16b, v30.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 12 | 
|  | aese	v0.16b, v30.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 12 | 
|  | aese	v3.16b, v30.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 12 | 
|  | aese	v2.16b, v30.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 12 | 
|  |  | 
|  | Lenc_finish_prepretail: | 
|  | eor	v10.16b, v10.16b, v4.16b | 
|  | eor	v10.16b, v10.16b, v9.16b | 
|  | pmull	v4.1q, v10.1d, v8.1d | 
|  | ext	v10.16b, v10.16b, v10.16b, #8 | 
|  | aese	v1.16b, v31.16b                                    // AES block 4k+5 - round N-1 | 
|  | eor	v11.16b, v11.16b, v4.16b | 
|  | aese	v3.16b, v31.16b                                    // AES block 4k+7 - round N-1 | 
|  | aese	v0.16b, v31.16b                                    // AES block 4k+4 - round N-1 | 
|  | aese	v2.16b, v31.16b                                    // AES block 4k+6 - round N-1 | 
|  | eor	v11.16b, v11.16b, v10.16b | 
|  |  | 
|  | Lenc_tail:	//	TAIL | 
|  | ext	v8.16b, v11.16b, v11.16b, #8                     // prepare final partial tag | 
|  | sub	x5, x4, x0   // main_end_input_ptr is number of bytes left to process | 
|  | ldp	x6, x7, [x0], #16           // AES block 4k+4 - load plaintext | 
|  | eor	x6, x6, x13                      // AES block 4k+4 - round N low | 
|  | eor	x7, x7, x14                      // AES block 4k+4 - round N high | 
|  | cmp	x5, #48 | 
|  | fmov	d4, x6                               // AES block 4k+4 - mov low | 
|  | fmov	v4.d[1], x7                           // AES block 4k+4 - mov high | 
|  | eor	v5.16b, v4.16b, v0.16b                          // AES block 4k+4 - result | 
|  | b.gt	Lenc_blocks_more_than_3 | 
|  | cmp	x5, #32 | 
|  | mov	v3.16b, v2.16b | 
|  | movi	v11.8b, #0 | 
|  | movi	v9.8b, #0 | 
|  | sub	w12, w12, #1 | 
|  | mov	v2.16b, v1.16b | 
|  | movi	v10.8b, #0 | 
|  | b.gt	Lenc_blocks_more_than_2 | 
|  | mov	v3.16b, v1.16b | 
|  | sub	w12, w12, #1 | 
|  | cmp	x5, #16 | 
|  | b.gt	Lenc_blocks_more_than_1 | 
|  | sub	w12, w12, #1 | 
|  | b	Lenc_blocks_less_than_1 | 
|  | Lenc_blocks_more_than_3:	//	blocks left >  3 | 
|  | st1	{ v5.16b}, [x2], #16                    // AES final-3 block  - store result | 
|  | ldp	x6, x7, [x0], #16          // AES final-2 block - load input low & high | 
|  | rev64	v4.16b, v5.16b                                   // GHASH final-3 block | 
|  | eor	x6, x6, x13                     // AES final-2 block - round N low | 
|  | eor	v4.16b, v4.16b, v8.16b                          // feed in partial tag | 
|  | eor	x7, x7, x14                     // AES final-2 block - round N high | 
|  | mov	d22, v4.d[1]                                // GHASH final-3 block - mid | 
|  | fmov	d5, x6                                // AES final-2 block - mov low | 
|  | fmov	v5.d[1], x7                            // AES final-2 block - mov high | 
|  | eor	v22.8b, v22.8b, v4.8b                     // GHASH final-3 block - mid | 
|  | movi	v8.8b, #0                                       // suppress further partial tag feed in | 
|  | mov	d10, v17.d[1]                              // GHASH final-3 block - mid | 
|  | pmull	v11.1q, v4.1d, v15.1d                      // GHASH final-3 block - low | 
|  | pmull2	v9.1q, v4.2d, v15.2d                      // GHASH final-3 block - high | 
|  | pmull	v10.1q, v22.1d, v10.1d                   // GHASH final-3 block - mid | 
|  | eor	v5.16b, v5.16b, v1.16b                           // AES final-2 block - result | 
|  | Lenc_blocks_more_than_2:	//	blocks left >  2 | 
|  | st1	{ v5.16b}, [x2], #16                    // AES final-2 block - store result | 
|  | ldp	x6, x7, [x0], #16          // AES final-1 block - load input low & high | 
|  | rev64	v4.16b, v5.16b                                   // GHASH final-2 block | 
|  | eor	x6, x6, x13                     // AES final-1 block - round N low | 
|  | eor	v4.16b, v4.16b, v8.16b                          // feed in partial tag | 
|  | fmov	d5, x6                                // AES final-1 block - mov low | 
|  | eor	x7, x7, x14                     // AES final-1 block - round N high | 
|  | fmov	v5.d[1], x7                            // AES final-1 block - mov high | 
|  | movi	v8.8b, #0                                       // suppress further partial tag feed in | 
|  | pmull2	v20.1q, v4.2d, v14.2d                         // GHASH final-2 block - high | 
|  | mov	d22, v4.d[1]                                // GHASH final-2 block - mid | 
|  | pmull	v21.1q, v4.1d, v14.1d                         // GHASH final-2 block - low | 
|  | eor	v22.8b, v22.8b, v4.8b                     // GHASH final-2 block - mid | 
|  | eor	v5.16b, v5.16b, v2.16b                           // AES final-1 block - result | 
|  | eor	v9.16b, v9.16b, v20.16b                           // GHASH final-2 block - high | 
|  | pmull	v22.1q, v22.1d, v17.1d                     // GHASH final-2 block - mid | 
|  | eor	v11.16b, v11.16b, v21.16b                           // GHASH final-2 block - low | 
|  | eor	v10.16b, v10.16b, v22.16b                      // GHASH final-2 block - mid | 
|  | Lenc_blocks_more_than_1:	//	blocks left >  1 | 
|  | st1	{ v5.16b}, [x2], #16                    // AES final-1 block - store result | 
|  | rev64	v4.16b, v5.16b                                   // GHASH final-1 block | 
|  | ldp	x6, x7, [x0], #16          // AES final block - load input low & high | 
|  | eor	v4.16b, v4.16b, v8.16b                          // feed in partial tag | 
|  | movi	v8.8b, #0                                       // suppress further partial tag feed in | 
|  | eor	x6, x6, x13                     // AES final block - round N low | 
|  | mov	d22, v4.d[1]                                // GHASH final-1 block - mid | 
|  | pmull2	v20.1q, v4.2d, v13.2d                         // GHASH final-1 block - high | 
|  | eor	x7, x7, x14                     // AES final block - round N high | 
|  | eor	v22.8b, v22.8b, v4.8b                     // GHASH final-1 block - mid | 
|  | eor	v9.16b, v9.16b, v20.16b                           // GHASH final-1 block - high | 
|  | ins	v22.d[1], v22.d[0]                           // GHASH final-1 block - mid | 
|  | fmov	d5, x6                                // AES final block - mov low | 
|  | fmov	v5.d[1], x7                            // AES final block - mov high | 
|  | pmull2	v22.1q, v22.2d, v16.2d                     // GHASH final-1 block - mid | 
|  | pmull	v21.1q, v4.1d, v13.1d                         // GHASH final-1 block - low | 
|  | eor	v5.16b, v5.16b, v3.16b                           // AES final block - result | 
|  | eor	v10.16b, v10.16b, v22.16b                      // GHASH final-1 block - mid | 
|  | eor	v11.16b, v11.16b, v21.16b                           // GHASH final-1 block - low | 
|  | Lenc_blocks_less_than_1:	//	blocks left <= 1 | 
|  | and	x1, x1, #127                   // bit_length %= 128 | 
|  | mvn	x13, xzr                                      // rkN_l = 0xffffffffffffffff | 
|  | sub	x1, x1, #128                   // bit_length -= 128 | 
|  | neg	x1, x1                         // bit_length = 128 - #bits in input (in range [1,128]) | 
|  | ld1	{ v18.16b}, [x2]                           // load existing bytes where the possibly partial last block is to be stored | 
|  | mvn	x14, xzr                                      // rkN_h = 0xffffffffffffffff | 
|  | and	x1, x1, #127                   // bit_length %= 128 | 
|  | lsr	x14, x14, x1                      // rkN_h is mask for top 64b of last block | 
|  | cmp	x1, #64 | 
|  | csel	x6, x13, x14, lt | 
|  | csel	x7, x14, xzr, lt | 
|  | fmov	d0, x6                                // ctr0b is mask for last block | 
|  | fmov	v0.d[1], x7 | 
|  | and	v5.16b, v5.16b, v0.16b                           // possibly partial last block has zeroes in highest bits | 
|  | rev64	v4.16b, v5.16b                                   // GHASH final block | 
|  | eor	v4.16b, v4.16b, v8.16b                          // feed in partial tag | 
|  | bif	v5.16b, v18.16b, v0.16b                             // insert existing bytes in top end of result before storing | 
|  | pmull2	v20.1q, v4.2d, v12.2d                         // GHASH final block - high | 
|  | mov	d8, v4.d[1]                                 // GHASH final block - mid | 
|  | rev	w9, w12 | 
|  | pmull	v21.1q, v4.1d, v12.1d                         // GHASH final block - low | 
|  | eor	v9.16b, v9.16b, v20.16b                           // GHASH final block - high | 
|  | eor	v8.8b, v8.8b, v4.8b                         // GHASH final block - mid | 
|  | pmull	v8.1q, v8.1d, v16.1d                         // GHASH final block - mid | 
|  | eor	v11.16b, v11.16b, v21.16b                           // GHASH final block - low | 
|  | eor	v10.16b, v10.16b, v8.16b                        // GHASH final block - mid | 
|  | movi	v8.8b, #0xc2 | 
|  | eor	v4.16b, v11.16b, v9.16b                        // MODULO - karatsuba tidy up | 
|  | shl	d8, d8, #56              // mod_constant | 
|  | eor	v10.16b, v10.16b, v4.16b                        // MODULO - karatsuba tidy up | 
|  | pmull	v7.1q, v9.1d, v8.1d           // MODULO - top 64b align with mid | 
|  | ext	v9.16b, v9.16b, v9.16b, #8                    // MODULO - other top alignment | 
|  | eor	v10.16b, v10.16b, v7.16b                     // MODULO - fold into mid | 
|  | eor	v10.16b, v10.16b, v9.16b                        // MODULO - fold into mid | 
|  | pmull	v9.1q, v10.1d, v8.1d           // MODULO - mid 64b align with low | 
|  | ext	v10.16b, v10.16b, v10.16b, #8                    // MODULO - other mid alignment | 
|  | str	w9, [x16, #12]                         // store the updated counter | 
|  | st1	{ v5.16b}, [x2]                         // store all 16B | 
|  | eor	v11.16b, v11.16b, v9.16b                        // MODULO - fold into low | 
|  | eor	v11.16b, v11.16b, v10.16b                        // MODULO - fold into low | 
|  | ext	v11.16b, v11.16b, v11.16b, #8 | 
|  | rev64	v11.16b, v11.16b | 
|  | mov	x0, x15 | 
|  | st1	{ v11.16b }, [x3] | 
|  | ldp	x19, x20, [sp, #16] | 
|  | ldp	x21, x22, [sp, #32] | 
|  | ldp	x23, x24, [sp, #48] | 
|  | ldp	d8, d9, [sp, #64] | 
|  | ldp	d10, d11, [sp, #80] | 
|  | ldp	d12, d13, [sp, #96] | 
|  | ldp	d14, d15, [sp, #112] | 
|  | ldp	x29, x30, [sp], #128 | 
|  | AARCH64_VALIDATE_LINK_REGISTER | 
|  | ret | 
|  |  | 
|  | .globl	aes_gcm_dec_kernel | 
|  |  | 
|  | .def aes_gcm_dec_kernel | 
|  | .type 32 | 
|  | .endef | 
|  | .align	4 | 
|  | aes_gcm_dec_kernel: | 
|  | AARCH64_SIGN_LINK_REGISTER | 
|  | stp	x29, x30, [sp, #-128]! | 
|  | mov	x29, sp | 
|  | stp	x19, x20, [sp, #16] | 
|  | mov	x16, x4 | 
|  | mov	x8, x5 | 
|  | stp	x21, x22, [sp, #32] | 
|  | stp	x23, x24, [sp, #48] | 
|  | stp	d8, d9, [sp, #64] | 
|  | stp	d10, d11, [sp, #80] | 
|  | stp	d12, d13, [sp, #96] | 
|  | stp	d14, d15, [sp, #112] | 
|  | ldr	w17, [x8, #240] | 
|  | add	x19, x8, x17, lsl #4                   // borrow input_l1 for last key | 
|  | ldp	x13, x14, [x19]                       // load round N keys | 
|  | ldr	q31, [x19, #-16]                        // load round N-1 keys | 
|  | lsr	x5, x1, #3              // byte_len | 
|  | mov	x15, x5 | 
|  | ldp	x10, x11, [x16]              // ctr96_b64, ctr96_t32 | 
|  | ldr	q26, [x8, #128]                                // load rk8 | 
|  | sub	x5, x5, #1      // byte_len - 1 | 
|  | ldr	q25, [x8, #112]                                // load rk7 | 
|  | and	x5, x5, #0xffffffffffffffc0 // number of bytes to be processed in main loop (at least 1 byte must be handled by tail) | 
|  | add	x4, x0, x1, lsr #3   // end_input_ptr | 
|  | ldr	q24, [x8, #96]                                 // load rk6 | 
|  | lsr	x12, x11, #32 | 
|  | ldr	q23, [x8, #80]                                 // load rk5 | 
|  | orr	w11, w11, w11 | 
|  | ldr	q21, [x8, #48]                                 // load rk3 | 
|  | add	x5, x5, x0 | 
|  | rev	w12, w12                                // rev_ctr32 | 
|  | add	w12, w12, #1                            // increment rev_ctr32 | 
|  | fmov	d3, x10                               // CTR block 3 | 
|  | rev	w9, w12                                 // CTR block 1 | 
|  | add	w12, w12, #1                            // CTR block 1 | 
|  | fmov	d1, x10                               // CTR block 1 | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 1 | 
|  | ld1	{ v0.16b}, [x16]                             // special case vector load initial counter so we can start first AES block as quickly as possible | 
|  | fmov	v1.d[1], x9                               // CTR block 1 | 
|  | rev	w9, w12                                 // CTR block 2 | 
|  | add	w12, w12, #1                            // CTR block 2 | 
|  | fmov	d2, x10                               // CTR block 2 | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 2 | 
|  | fmov	v2.d[1], x9                               // CTR block 2 | 
|  | rev	w9, w12                                 // CTR block 3 | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 3 | 
|  | ldr	q18, [x8, #0]                                  // load rk0 | 
|  | fmov	v3.d[1], x9                               // CTR block 3 | 
|  | add	w12, w12, #1                            // CTR block 3 | 
|  | ldr	q22, [x8, #64]                                 // load rk4 | 
|  | ldr	q19, [x8, #16]                                 // load rk1 | 
|  | aese	v0.16b, v18.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 0 | 
|  | ldr	q14, [x6, #48]                              // load h3l | h3h | 
|  | ext	v14.16b, v14.16b, v14.16b, #8 | 
|  | aese	v3.16b, v18.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 0 | 
|  | ldr	q15, [x6, #80]                              // load h4l | h4h | 
|  | ext	v15.16b, v15.16b, v15.16b, #8 | 
|  | aese	v1.16b, v18.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 0 | 
|  | ldr	q13, [x6, #32]                              // load h2l | h2h | 
|  | ext	v13.16b, v13.16b, v13.16b, #8 | 
|  | aese	v2.16b, v18.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 0 | 
|  | ldr	q20, [x8, #32]                                 // load rk2 | 
|  | aese	v0.16b, v19.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 1 | 
|  | aese	v1.16b, v19.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 1 | 
|  | ld1	{ v11.16b}, [x3] | 
|  | ext	v11.16b, v11.16b, v11.16b, #8 | 
|  | rev64	v11.16b, v11.16b | 
|  | aese	v2.16b, v19.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 1 | 
|  | ldr	q27, [x8, #144]                                // load rk9 | 
|  | aese	v3.16b, v19.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 1 | 
|  | ldr	q30, [x8, #192]                               // load rk12 | 
|  | aese	v0.16b, v20.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 2 | 
|  | ldr	q12, [x6]                                   // load h1l | h1h | 
|  | ext	v12.16b, v12.16b, v12.16b, #8 | 
|  | aese	v2.16b, v20.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 2 | 
|  | ldr	q28, [x8, #160]                               // load rk10 | 
|  | aese	v3.16b, v20.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 2 | 
|  | aese	v0.16b, v21.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 3 | 
|  | aese	v1.16b, v20.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 2 | 
|  | aese	v3.16b, v21.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 3 | 
|  | aese	v0.16b, v22.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 4 | 
|  | aese	v2.16b, v21.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 3 | 
|  | aese	v1.16b, v21.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 3 | 
|  | aese	v3.16b, v22.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 4 | 
|  | aese	v2.16b, v22.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 4 | 
|  | aese	v1.16b, v22.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 4 | 
|  | aese	v3.16b, v23.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 5 | 
|  | aese	v0.16b, v23.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 5 | 
|  | aese	v1.16b, v23.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 5 | 
|  | aese	v2.16b, v23.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 5 | 
|  | aese	v0.16b, v24.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 6 | 
|  | aese	v3.16b, v24.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 6 | 
|  | cmp	x17, #12                                      // setup flags for AES-128/192/256 check | 
|  | aese	v1.16b, v24.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 6 | 
|  | aese	v2.16b, v24.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 6 | 
|  | aese	v0.16b, v25.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 7 | 
|  | aese	v1.16b, v25.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 7 | 
|  | aese	v3.16b, v25.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 7 | 
|  | aese	v0.16b, v26.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 8 | 
|  | aese	v2.16b, v25.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 7 | 
|  | aese	v3.16b, v26.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 8 | 
|  | aese	v1.16b, v26.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 8 | 
|  | ldr	q29, [x8, #176]                               // load rk11 | 
|  | aese	v2.16b, v26.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 8 | 
|  | b.lt	Ldec_finish_first_blocks                         // branch if AES-128 | 
|  |  | 
|  | aese	v0.16b, v27.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 9 | 
|  | aese	v1.16b, v27.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 9 | 
|  | aese	v3.16b, v27.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 9 | 
|  | aese	v2.16b, v27.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 9 | 
|  | aese	v0.16b, v28.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 10 | 
|  | aese	v1.16b, v28.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 10 | 
|  | aese	v3.16b, v28.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 10 | 
|  | aese	v2.16b, v28.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 10 | 
|  | b.eq	Ldec_finish_first_blocks                         // branch if AES-192 | 
|  |  | 
|  | aese	v0.16b, v29.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 11 | 
|  | aese	v3.16b, v29.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 11 | 
|  | aese	v1.16b, v29.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 11 | 
|  | aese	v2.16b, v29.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 11 | 
|  | aese	v1.16b, v30.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 1 - round 12 | 
|  | aese	v0.16b, v30.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 0 - round 12 | 
|  | aese	v2.16b, v30.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 2 - round 12 | 
|  | aese	v3.16b, v30.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 3 - round 12 | 
|  |  | 
|  | Ldec_finish_first_blocks: | 
|  | cmp	x0, x5                   // check if we have <= 4 blocks | 
|  | trn1	v9.2d, v14.2d,    v15.2d                      // h4h | h3h | 
|  | trn2	v17.2d,  v14.2d,    v15.2d                      // h4l | h3l | 
|  | trn1	v8.2d,    v12.2d,    v13.2d                      // h2h | h1h | 
|  | trn2	v16.2d,  v12.2d,    v13.2d                      // h2l | h1l | 
|  | eor	v17.16b, v17.16b, v9.16b                  // h4k | h3k | 
|  | aese	v1.16b, v31.16b                                    // AES block 1 - round N-1 | 
|  | aese	v2.16b, v31.16b                                    // AES block 2 - round N-1 | 
|  | eor	v16.16b, v16.16b, v8.16b                     // h2k | h1k | 
|  | aese	v3.16b, v31.16b                                    // AES block 3 - round N-1 | 
|  | aese	v0.16b, v31.16b                                    // AES block 0 - round N-1 | 
|  | b.ge	Ldec_tail                                        // handle tail | 
|  |  | 
|  | ldr	q4, [x0, #0]                          // AES block 0 - load ciphertext | 
|  | ldr	q5, [x0, #16]                         // AES block 1 - load ciphertext | 
|  | rev	w9, w12                                 // CTR block 4 | 
|  | eor	v0.16b, v4.16b, v0.16b                            // AES block 0 - result | 
|  | eor	v1.16b, v5.16b, v1.16b                            // AES block 1 - result | 
|  | rev64	v5.16b, v5.16b                                    // GHASH block 1 | 
|  | ldr	q7, [x0, #48]                         // AES block 3 - load ciphertext | 
|  | mov	x7, v0.d[1]                            // AES block 0 - mov high | 
|  | mov	x6, v0.d[0]                            // AES block 0 - mov low | 
|  | rev64	v4.16b, v4.16b                                    // GHASH block 0 | 
|  | add	w12, w12, #1                            // CTR block 4 | 
|  | fmov	d0, x10                               // CTR block 4 | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 4 | 
|  | fmov	v0.d[1], x9                               // CTR block 4 | 
|  | rev	w9, w12                                 // CTR block 5 | 
|  | add	w12, w12, #1                            // CTR block 5 | 
|  | mov	x19, v1.d[0]                            // AES block 1 - mov low | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 5 | 
|  | mov	x20, v1.d[1]                            // AES block 1 - mov high | 
|  | eor	x7, x7, x14                    // AES block 0 - round N high | 
|  | eor	x6, x6, x13                    // AES block 0 - round N low | 
|  | stp	x6, x7, [x2], #16        // AES block 0 - store result | 
|  | fmov	d1, x10                               // CTR block 5 | 
|  | ldr	q6, [x0, #32]                         // AES block 2 - load ciphertext | 
|  | add	x0, x0, #64                       // AES input_ptr update | 
|  | fmov	v1.d[1], x9                               // CTR block 5 | 
|  | rev	w9, w12                                 // CTR block 6 | 
|  | add	w12, w12, #1                            // CTR block 6 | 
|  | eor	x19, x19, x13                    // AES block 1 - round N low | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 6 | 
|  | eor	x20, x20, x14                    // AES block 1 - round N high | 
|  | stp	x19, x20, [x2], #16        // AES block 1 - store result | 
|  | eor	v2.16b, v6.16b, v2.16b                            // AES block 2 - result | 
|  | cmp	x0, x5                   // check if we have <= 8 blocks | 
|  | b.ge	Ldec_prepretail                                  // do prepretail | 
|  |  | 
|  | Ldec_main_loop:	//	main loop start | 
|  | mov	x21, v2.d[0]                            // AES block 4k+2 - mov low | 
|  | ext	v11.16b, v11.16b, v11.16b, #8                     // PRE 0 | 
|  | eor	v3.16b, v7.16b, v3.16b                            // AES block 4k+3 - result | 
|  | aese	v0.16b, v18.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 0 | 
|  | mov	x22, v2.d[1]                            // AES block 4k+2 - mov high | 
|  | aese	v1.16b, v18.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 0 | 
|  | fmov	d2, x10                               // CTR block 4k+6 | 
|  | fmov	v2.d[1], x9                               // CTR block 4k+6 | 
|  | eor	v4.16b, v4.16b, v11.16b                           // PRE 1 | 
|  | rev	w9, w12                                 // CTR block 4k+7 | 
|  | aese	v0.16b, v19.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 1 | 
|  | mov	x24, v3.d[1]                            // AES block 4k+3 - mov high | 
|  | aese	v1.16b, v19.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 1 | 
|  | mov	x23, v3.d[0]                            // AES block 4k+3 - mov low | 
|  | pmull2	v9.1q, v4.2d, v15.2d                       // GHASH block 4k - high | 
|  | mov	d8, v4.d[1]                                  // GHASH block 4k - mid | 
|  | fmov	d3, x10                               // CTR block 4k+7 | 
|  | aese	v0.16b, v20.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 2 | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 4k+7 | 
|  | aese	v2.16b, v18.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 0 | 
|  | fmov	v3.d[1], x9                               // CTR block 4k+7 | 
|  | aese	v1.16b, v20.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 2 | 
|  | eor	v8.8b, v8.8b, v4.8b                          // GHASH block 4k - mid | 
|  | aese	v0.16b, v21.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 3 | 
|  | eor	x22, x22, x14                    // AES block 4k+2 - round N high | 
|  | aese	v2.16b, v19.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 1 | 
|  | mov	d10, v17.d[1]                               // GHASH block 4k - mid | 
|  | aese	v1.16b, v21.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 3 | 
|  | rev64	v6.16b, v6.16b                                    // GHASH block 4k+2 | 
|  | aese	v3.16b, v18.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 0 | 
|  | eor	x21, x21, x13                    // AES block 4k+2 - round N low | 
|  | aese	v2.16b, v20.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 2 | 
|  | stp	x21, x22, [x2], #16        // AES block 4k+2 - store result | 
|  | pmull	v11.1q, v4.1d, v15.1d                       // GHASH block 4k - low | 
|  | pmull2	v4.1q, v5.2d, v14.2d                          // GHASH block 4k+1 - high | 
|  | aese	v2.16b, v21.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 3 | 
|  | rev64	v7.16b, v7.16b                                    // GHASH block 4k+3 | 
|  | pmull	v10.1q, v8.1d, v10.1d                      // GHASH block 4k - mid | 
|  | eor	x23, x23, x13                    // AES block 4k+3 - round N low | 
|  | pmull	v8.1q, v5.1d, v14.1d                          // GHASH block 4k+1 - low | 
|  | eor	x24, x24, x14                    // AES block 4k+3 - round N high | 
|  | eor	v9.16b, v9.16b, v4.16b                         // GHASH block 4k+1 - high | 
|  | aese	v2.16b, v22.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 4 | 
|  | aese	v3.16b, v19.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 1 | 
|  | mov	d4, v5.d[1]                                  // GHASH block 4k+1 - mid | 
|  | aese	v0.16b, v22.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 4 | 
|  | eor	v11.16b, v11.16b, v8.16b                         // GHASH block 4k+1 - low | 
|  | aese	v2.16b, v23.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 5 | 
|  | add	w12, w12, #1                            // CTR block 4k+7 | 
|  | aese	v3.16b, v20.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 2 | 
|  | mov	d8, v6.d[1]                                  // GHASH block 4k+2 - mid | 
|  | aese	v1.16b, v22.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 4 | 
|  | eor	v4.8b, v4.8b, v5.8b                          // GHASH block 4k+1 - mid | 
|  | pmull	v5.1q, v6.1d, v13.1d                          // GHASH block 4k+2 - low | 
|  | aese	v3.16b, v21.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 3 | 
|  | eor	v8.8b, v8.8b, v6.8b                          // GHASH block 4k+2 - mid | 
|  | aese	v1.16b, v23.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 5 | 
|  | aese	v0.16b, v23.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 5 | 
|  | eor	v11.16b, v11.16b, v5.16b                         // GHASH block 4k+2 - low | 
|  | pmull	v4.1q, v4.1d, v17.1d                          // GHASH block 4k+1 - mid | 
|  | rev	w9, w12                                 // CTR block 4k+8 | 
|  | aese	v1.16b, v24.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 6 | 
|  | ins	v8.d[1], v8.d[0]                                // GHASH block 4k+2 - mid | 
|  | aese	v0.16b, v24.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 6 | 
|  | add	w12, w12, #1                            // CTR block 4k+8 | 
|  | aese	v3.16b, v22.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 4 | 
|  | aese	v1.16b, v25.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 7 | 
|  | eor	v10.16b, v10.16b, v4.16b                         // GHASH block 4k+1 - mid | 
|  | aese	v0.16b, v25.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 7 | 
|  | pmull2	v4.1q, v6.2d, v13.2d                          // GHASH block 4k+2 - high | 
|  | mov	d6, v7.d[1]                                  // GHASH block 4k+3 - mid | 
|  | aese	v3.16b, v23.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 5 | 
|  | pmull2	v8.1q, v8.2d, v16.2d                          // GHASH block 4k+2 - mid | 
|  | aese	v0.16b, v26.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 8 | 
|  | eor	v9.16b, v9.16b, v4.16b                         // GHASH block 4k+2 - high | 
|  | aese	v3.16b, v24.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 6 | 
|  | pmull	v4.1q, v7.1d, v12.1d                          // GHASH block 4k+3 - low | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 4k+8 | 
|  | eor	v10.16b, v10.16b, v8.16b                         // GHASH block 4k+2 - mid | 
|  | pmull2	v5.1q, v7.2d, v12.2d                          // GHASH block 4k+3 - high | 
|  | cmp	x17, #12                                      // setup flags for AES-128/192/256 check | 
|  | eor	v6.8b, v6.8b, v7.8b                          // GHASH block 4k+3 - mid | 
|  | aese	v1.16b, v26.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 8 | 
|  | aese	v2.16b, v24.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 6 | 
|  | eor	v9.16b, v9.16b, v5.16b                         // GHASH block 4k+3 - high | 
|  | pmull	v6.1q, v6.1d, v16.1d                          // GHASH block 4k+3 - mid | 
|  | movi	v8.8b, #0xc2 | 
|  | aese	v2.16b, v25.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 7 | 
|  | eor	v11.16b, v11.16b, v4.16b                         // GHASH block 4k+3 - low | 
|  | aese	v3.16b, v25.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 7 | 
|  | shl	d8, d8, #56               // mod_constant | 
|  | aese	v2.16b, v26.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 8 | 
|  | eor	v10.16b, v10.16b, v6.16b                         // GHASH block 4k+3 - mid | 
|  | aese	v3.16b, v26.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 8 | 
|  | b.lt	Ldec_main_loop_continue                          // branch if AES-128 | 
|  |  | 
|  | aese	v0.16b, v27.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 9 | 
|  | aese	v2.16b, v27.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 9 | 
|  | aese	v1.16b, v27.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 9 | 
|  | aese	v3.16b, v27.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 9 | 
|  | aese	v0.16b, v28.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 10 | 
|  | aese	v1.16b, v28.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 10 | 
|  | aese	v2.16b, v28.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 10 | 
|  | aese	v3.16b, v28.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 10 | 
|  | b.eq	Ldec_main_loop_continue                          // branch if AES-192 | 
|  |  | 
|  | aese	v0.16b, v29.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 11 | 
|  | aese	v1.16b, v29.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 11 | 
|  | aese	v2.16b, v29.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 11 | 
|  | aese	v3.16b, v29.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 11 | 
|  | aese	v0.16b, v30.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 12 | 
|  | aese	v1.16b, v30.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 12 | 
|  | aese	v2.16b, v30.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 12 | 
|  | aese	v3.16b, v30.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 12 | 
|  |  | 
|  | Ldec_main_loop_continue: | 
|  | pmull	v7.1q, v9.1d, v8.1d            // MODULO - top 64b align with mid | 
|  | eor	v6.16b, v11.16b, v9.16b                         // MODULO - karatsuba tidy up | 
|  | ldr	q4, [x0, #0]                          // AES block 4k+4 - load ciphertext | 
|  | aese	v0.16b, v31.16b                                    // AES block 4k+4 - round N-1 | 
|  | ext	v9.16b, v9.16b, v9.16b, #8                     // MODULO - other top alignment | 
|  | eor	v10.16b, v10.16b, v6.16b                         // MODULO - karatsuba tidy up | 
|  | ldr	q5, [x0, #16]                         // AES block 4k+5 - load ciphertext | 
|  | eor	v0.16b, v4.16b, v0.16b                            // AES block 4k+4 - result | 
|  | stp	x23, x24, [x2], #16        // AES block 4k+3 - store result | 
|  | eor	v10.16b, v10.16b, v7.16b                      // MODULO - fold into mid | 
|  | ldr	q7, [x0, #48]                         // AES block 4k+7 - load ciphertext | 
|  | ldr	q6, [x0, #32]                         // AES block 4k+6 - load ciphertext | 
|  | mov	x7, v0.d[1]                            // AES block 4k+4 - mov high | 
|  | eor	v10.16b, v10.16b, v9.16b                         // MODULO - fold into mid | 
|  | aese	v1.16b, v31.16b                                    // AES block 4k+5 - round N-1 | 
|  | add	x0, x0, #64                       // AES input_ptr update | 
|  | mov	x6, v0.d[0]                            // AES block 4k+4 - mov low | 
|  | fmov	d0, x10                               // CTR block 4k+8 | 
|  | fmov	v0.d[1], x9                               // CTR block 4k+8 | 
|  | pmull	v8.1q, v10.1d, v8.1d     // MODULO - mid 64b align with low | 
|  | eor	v1.16b, v5.16b, v1.16b                            // AES block 4k+5 - result | 
|  | rev	w9, w12                                 // CTR block 4k+9 | 
|  | aese	v2.16b, v31.16b                                    // AES block 4k+6 - round N-1 | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 4k+9 | 
|  | cmp	x0, x5                   // LOOP CONTROL | 
|  | add	w12, w12, #1                            // CTR block 4k+9 | 
|  | eor	x6, x6, x13                    // AES block 4k+4 - round N low | 
|  | eor	x7, x7, x14                    // AES block 4k+4 - round N high | 
|  | mov	x20, v1.d[1]                            // AES block 4k+5 - mov high | 
|  | eor	v2.16b, v6.16b, v2.16b                            // AES block 4k+6 - result | 
|  | eor	v11.16b, v11.16b, v8.16b               // MODULO - fold into low | 
|  | mov	x19, v1.d[0]                            // AES block 4k+5 - mov low | 
|  | fmov	d1, x10                               // CTR block 4k+9 | 
|  | ext	v10.16b, v10.16b, v10.16b, #8                     // MODULO - other mid alignment | 
|  | fmov	v1.d[1], x9                               // CTR block 4k+9 | 
|  | rev	w9, w12                                 // CTR block 4k+10 | 
|  | add	w12, w12, #1                            // CTR block 4k+10 | 
|  | aese	v3.16b, v31.16b                                    // AES block 4k+7 - round N-1 | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 4k+10 | 
|  | rev64	v5.16b, v5.16b                                    // GHASH block 4k+5 | 
|  | eor	x20, x20, x14                    // AES block 4k+5 - round N high | 
|  | stp	x6, x7, [x2], #16        // AES block 4k+4 - store result | 
|  | eor	x19, x19, x13                    // AES block 4k+5 - round N low | 
|  | stp	x19, x20, [x2], #16        // AES block 4k+5 - store result | 
|  | rev64	v4.16b, v4.16b                                    // GHASH block 4k+4 | 
|  | eor	v11.16b, v11.16b, v10.16b                         // MODULO - fold into low | 
|  | b.lt	Ldec_main_loop | 
|  |  | 
|  | Ldec_prepretail:	//	PREPRETAIL | 
|  | ext	v11.16b, v11.16b, v11.16b, #8                     // PRE 0 | 
|  | mov	x21, v2.d[0]                            // AES block 4k+2 - mov low | 
|  | eor	v3.16b, v7.16b, v3.16b                            // AES block 4k+3 - result | 
|  | aese	v0.16b, v18.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 0 | 
|  | mov	x22, v2.d[1]                            // AES block 4k+2 - mov high | 
|  | aese	v1.16b, v18.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 0 | 
|  | fmov	d2, x10                               // CTR block 4k+6 | 
|  | fmov	v2.d[1], x9                               // CTR block 4k+6 | 
|  | rev	w9, w12                                 // CTR block 4k+7 | 
|  | eor	v4.16b, v4.16b, v11.16b                           // PRE 1 | 
|  | rev64	v6.16b, v6.16b                                    // GHASH block 4k+2 | 
|  | orr	x9, x11, x9, lsl #32            // CTR block 4k+7 | 
|  | mov	x23, v3.d[0]                            // AES block 4k+3 - mov low | 
|  | aese	v1.16b, v19.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 1 | 
|  | mov	x24, v3.d[1]                            // AES block 4k+3 - mov high | 
|  | pmull	v11.1q, v4.1d, v15.1d                       // GHASH block 4k - low | 
|  | mov	d8, v4.d[1]                                  // GHASH block 4k - mid | 
|  | fmov	d3, x10                               // CTR block 4k+7 | 
|  | pmull2	v9.1q, v4.2d, v15.2d                       // GHASH block 4k - high | 
|  | fmov	v3.d[1], x9                               // CTR block 4k+7 | 
|  | aese	v2.16b, v18.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 0 | 
|  | mov	d10, v17.d[1]                               // GHASH block 4k - mid | 
|  | aese	v0.16b, v19.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 1 | 
|  | eor	v8.8b, v8.8b, v4.8b                          // GHASH block 4k - mid | 
|  | pmull2	v4.1q, v5.2d, v14.2d                          // GHASH block 4k+1 - high | 
|  | aese	v2.16b, v19.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 1 | 
|  | rev64	v7.16b, v7.16b                                    // GHASH block 4k+3 | 
|  | aese	v3.16b, v18.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 0 | 
|  | pmull	v10.1q, v8.1d, v10.1d                      // GHASH block 4k - mid | 
|  | eor	v9.16b, v9.16b, v4.16b                         // GHASH block 4k+1 - high | 
|  | pmull	v8.1q, v5.1d, v14.1d                          // GHASH block 4k+1 - low | 
|  | aese	v3.16b, v19.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 1 | 
|  | mov	d4, v5.d[1]                                  // GHASH block 4k+1 - mid | 
|  | aese	v0.16b, v20.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 2 | 
|  | aese	v1.16b, v20.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 2 | 
|  | eor	v11.16b, v11.16b, v8.16b                         // GHASH block 4k+1 - low | 
|  | aese	v2.16b, v20.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 2 | 
|  | aese	v0.16b, v21.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 3 | 
|  | mov	d8, v6.d[1]                                  // GHASH block 4k+2 - mid | 
|  | aese	v3.16b, v20.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 2 | 
|  | eor	v4.8b, v4.8b, v5.8b                          // GHASH block 4k+1 - mid | 
|  | pmull	v5.1q, v6.1d, v13.1d                          // GHASH block 4k+2 - low | 
|  | aese	v0.16b, v22.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 4 | 
|  | aese	v3.16b, v21.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 3 | 
|  | eor	v8.8b, v8.8b, v6.8b                          // GHASH block 4k+2 - mid | 
|  | pmull	v4.1q, v4.1d, v17.1d                          // GHASH block 4k+1 - mid | 
|  | aese	v0.16b, v23.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 5 | 
|  | eor	v11.16b, v11.16b, v5.16b                         // GHASH block 4k+2 - low | 
|  | aese	v3.16b, v22.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 4 | 
|  | pmull2	v5.1q, v7.2d, v12.2d                          // GHASH block 4k+3 - high | 
|  | eor	v10.16b, v10.16b, v4.16b                         // GHASH block 4k+1 - mid | 
|  | pmull2	v4.1q, v6.2d, v13.2d                          // GHASH block 4k+2 - high | 
|  | aese	v3.16b, v23.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 5 | 
|  | ins	v8.d[1], v8.d[0]                                // GHASH block 4k+2 - mid | 
|  | aese	v2.16b, v21.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 3 | 
|  | aese	v1.16b, v21.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 3 | 
|  | eor	v9.16b, v9.16b, v4.16b                         // GHASH block 4k+2 - high | 
|  | pmull	v4.1q, v7.1d, v12.1d                          // GHASH block 4k+3 - low | 
|  | aese	v2.16b, v22.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 4 | 
|  | mov	d6, v7.d[1]                                  // GHASH block 4k+3 - mid | 
|  | aese	v1.16b, v22.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 4 | 
|  | pmull2	v8.1q, v8.2d, v16.2d                          // GHASH block 4k+2 - mid | 
|  | aese	v2.16b, v23.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 5 | 
|  | eor	v6.8b, v6.8b, v7.8b                          // GHASH block 4k+3 - mid | 
|  | aese	v1.16b, v23.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 5 | 
|  | aese	v3.16b, v24.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 6 | 
|  | eor	v10.16b, v10.16b, v8.16b                         // GHASH block 4k+2 - mid | 
|  | aese	v2.16b, v24.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 6 | 
|  | aese	v0.16b, v24.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 6 | 
|  | movi	v8.8b, #0xc2 | 
|  | aese	v1.16b, v24.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 6 | 
|  | eor	v11.16b, v11.16b, v4.16b                         // GHASH block 4k+3 - low | 
|  | pmull	v6.1q, v6.1d, v16.1d                          // GHASH block 4k+3 - mid | 
|  | aese	v3.16b, v25.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 7 | 
|  | cmp	x17, #12                                      // setup flags for AES-128/192/256 check | 
|  | eor	v9.16b, v9.16b, v5.16b                         // GHASH block 4k+3 - high | 
|  | aese	v1.16b, v25.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 7 | 
|  | aese	v0.16b, v25.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 7 | 
|  | eor	v10.16b, v10.16b, v6.16b                         // GHASH block 4k+3 - mid | 
|  | aese	v3.16b, v26.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 8 | 
|  | aese	v2.16b, v25.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 7 | 
|  | eor	v6.16b, v11.16b, v9.16b                         // MODULO - karatsuba tidy up | 
|  | aese	v1.16b, v26.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 8 | 
|  | aese	v0.16b, v26.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 8 | 
|  | shl	d8, d8, #56               // mod_constant | 
|  | aese	v2.16b, v26.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 8 | 
|  | b.lt	Ldec_finish_prepretail                           // branch if AES-128 | 
|  |  | 
|  | aese	v1.16b, v27.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 9 | 
|  | aese	v2.16b, v27.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 9 | 
|  | aese	v3.16b, v27.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 9 | 
|  | aese	v0.16b, v27.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 9 | 
|  | aese	v2.16b, v28.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 10 | 
|  | aese	v3.16b, v28.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 10 | 
|  | aese	v0.16b, v28.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 10 | 
|  | aese	v1.16b, v28.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 10 | 
|  | b.eq	Ldec_finish_prepretail                           // branch if AES-192 | 
|  |  | 
|  | aese	v2.16b, v29.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 11 | 
|  | aese	v0.16b, v29.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 11 | 
|  | aese	v1.16b, v29.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 11 | 
|  | aese	v2.16b, v30.16b | 
|  | aesmc	v2.16b, v2.16b          // AES block 4k+6 - round 12 | 
|  | aese	v3.16b, v29.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 11 | 
|  | aese	v1.16b, v30.16b | 
|  | aesmc	v1.16b, v1.16b          // AES block 4k+5 - round 12 | 
|  | aese	v0.16b, v30.16b | 
|  | aesmc	v0.16b, v0.16b          // AES block 4k+4 - round 12 | 
|  | aese	v3.16b, v30.16b | 
|  | aesmc	v3.16b, v3.16b          // AES block 4k+7 - round 12 | 
|  |  | 
|  | Ldec_finish_prepretail: | 
|  | eor	v10.16b, v10.16b, v6.16b                         // MODULO - karatsuba tidy up | 
|  | pmull	v7.1q, v9.1d, v8.1d            // MODULO - top 64b align with mid | 
|  | ext	v9.16b, v9.16b, v9.16b, #8                     // MODULO - other top alignment | 
|  | eor	v10.16b, v10.16b, v7.16b                      // MODULO - fold into mid | 
|  | eor	x22, x22, x14                    // AES block 4k+2 - round N high | 
|  | eor	x23, x23, x13                    // AES block 4k+3 - round N low | 
|  | eor	v10.16b, v10.16b, v9.16b                         // MODULO - fold into mid | 
|  | add	w12, w12, #1                            // CTR block 4k+7 | 
|  | eor	x21, x21, x13                    // AES block 4k+2 - round N low | 
|  | pmull	v8.1q, v10.1d, v8.1d     // MODULO - mid 64b align with low | 
|  | eor	x24, x24, x14                    // AES block 4k+3 - round N high | 
|  | stp	x21, x22, [x2], #16        // AES block 4k+2 - store result | 
|  | ext	v10.16b, v10.16b, v10.16b, #8                     // MODULO - other mid alignment | 
|  | stp	x23, x24, [x2], #16        // AES block 4k+3 - store result | 
|  |  | 
|  | eor	v11.16b, v11.16b, v8.16b               // MODULO - fold into low | 
|  | aese	v1.16b, v31.16b                                    // AES block 4k+5 - round N-1 | 
|  | aese	v0.16b, v31.16b                                    // AES block 4k+4 - round N-1 | 
|  | aese	v3.16b, v31.16b                                    // AES block 4k+7 - round N-1 | 
|  | aese	v2.16b, v31.16b                                    // AES block 4k+6 - round N-1 | 
|  | eor	v11.16b, v11.16b, v10.16b                         // MODULO - fold into low | 
|  |  | 
|  | Ldec_tail:	//	TAIL | 
|  | sub	x5, x4, x0   // main_end_input_ptr is number of bytes left to process | 
|  | ld1	{ v5.16b}, [x0], #16                      // AES block 4k+4 - load ciphertext | 
|  | eor	v0.16b, v5.16b, v0.16b                            // AES block 4k+4 - result | 
|  | mov	x6, v0.d[0]                            // AES block 4k+4 - mov low | 
|  | mov	x7, v0.d[1]                            // AES block 4k+4 - mov high | 
|  | ext	v8.16b, v11.16b, v11.16b, #8                     // prepare final partial tag | 
|  | cmp	x5, #48 | 
|  | eor	x6, x6, x13                    // AES block 4k+4 - round N low | 
|  | eor	x7, x7, x14                    // AES block 4k+4 - round N high | 
|  | b.gt	Ldec_blocks_more_than_3 | 
|  | sub	w12, w12, #1 | 
|  | mov	v3.16b, v2.16b | 
|  | movi	v10.8b, #0 | 
|  | movi	v11.8b, #0 | 
|  | cmp	x5, #32 | 
|  | movi	v9.8b, #0 | 
|  | mov	v2.16b, v1.16b | 
|  | b.gt	Ldec_blocks_more_than_2 | 
|  | sub	w12, w12, #1 | 
|  | mov	v3.16b, v1.16b | 
|  | cmp	x5, #16 | 
|  | b.gt	Ldec_blocks_more_than_1 | 
|  | sub	w12, w12, #1 | 
|  | b	Ldec_blocks_less_than_1 | 
|  | Ldec_blocks_more_than_3:	//	blocks left >  3 | 
|  | rev64	v4.16b, v5.16b                                   // GHASH final-3 block | 
|  | ld1	{ v5.16b}, [x0], #16                     // AES final-2 block - load ciphertext | 
|  | stp	x6, x7, [x2], #16       // AES final-3 block  - store result | 
|  | mov	d10, v17.d[1]                              // GHASH final-3 block - mid | 
|  | eor	v4.16b, v4.16b, v8.16b                          // feed in partial tag | 
|  | eor	v0.16b, v5.16b, v1.16b                           // AES final-2 block - result | 
|  | mov	d22, v4.d[1]                                // GHASH final-3 block - mid | 
|  | mov	x6, v0.d[0]                           // AES final-2 block - mov low | 
|  | mov	x7, v0.d[1]                           // AES final-2 block - mov high | 
|  | eor	v22.8b, v22.8b, v4.8b                     // GHASH final-3 block - mid | 
|  | movi	v8.8b, #0                                       // suppress further partial tag feed in | 
|  | pmull2	v9.1q, v4.2d, v15.2d                      // GHASH final-3 block - high | 
|  | pmull	v10.1q, v22.1d, v10.1d                   // GHASH final-3 block - mid | 
|  | eor	x6, x6, x13                   // AES final-2 block - round N low | 
|  | pmull	v11.1q, v4.1d, v15.1d                      // GHASH final-3 block - low | 
|  | eor	x7, x7, x14                   // AES final-2 block - round N high | 
|  | Ldec_blocks_more_than_2:	//	blocks left >  2 | 
|  | rev64	v4.16b, v5.16b                                   // GHASH final-2 block | 
|  | ld1	{ v5.16b}, [x0], #16                     // AES final-1 block - load ciphertext | 
|  | eor	v4.16b, v4.16b, v8.16b                          // feed in partial tag | 
|  | stp	x6, x7, [x2], #16       // AES final-2 block  - store result | 
|  | eor	v0.16b, v5.16b, v2.16b                           // AES final-1 block - result | 
|  | mov	d22, v4.d[1]                                // GHASH final-2 block - mid | 
|  | pmull	v21.1q, v4.1d, v14.1d                         // GHASH final-2 block - low | 
|  | pmull2	v20.1q, v4.2d, v14.2d                         // GHASH final-2 block - high | 
|  | eor	v22.8b, v22.8b, v4.8b                     // GHASH final-2 block - mid | 
|  | mov	x6, v0.d[0]                           // AES final-1 block - mov low | 
|  | mov	x7, v0.d[1]                           // AES final-1 block - mov high | 
|  | eor	v11.16b, v11.16b, v21.16b                           // GHASH final-2 block - low | 
|  | movi	v8.8b, #0                                       // suppress further partial tag feed in | 
|  | pmull	v22.1q, v22.1d, v17.1d                     // GHASH final-2 block - mid | 
|  | eor	v9.16b, v9.16b, v20.16b                           // GHASH final-2 block - high | 
|  | eor	x6, x6, x13                   // AES final-1 block - round N low | 
|  | eor	v10.16b, v10.16b, v22.16b                      // GHASH final-2 block - mid | 
|  | eor	x7, x7, x14                   // AES final-1 block - round N high | 
|  | Ldec_blocks_more_than_1:	//	blocks left >  1 | 
|  | stp	x6, x7, [x2], #16       // AES final-1 block  - store result | 
|  | rev64	v4.16b, v5.16b                                   // GHASH final-1 block | 
|  | ld1	{ v5.16b}, [x0], #16                     // AES final block - load ciphertext | 
|  | eor	v4.16b, v4.16b, v8.16b                          // feed in partial tag | 
|  | movi	v8.8b, #0                                       // suppress further partial tag feed in | 
|  | mov	d22, v4.d[1]                                // GHASH final-1 block - mid | 
|  | eor	v0.16b, v5.16b, v3.16b                           // AES final block - result | 
|  | pmull2	v20.1q, v4.2d, v13.2d                         // GHASH final-1 block - high | 
|  | eor	v22.8b, v22.8b, v4.8b                     // GHASH final-1 block - mid | 
|  | pmull	v21.1q, v4.1d, v13.1d                         // GHASH final-1 block - low | 
|  | mov	x6, v0.d[0]                           // AES final block - mov low | 
|  | ins	v22.d[1], v22.d[0]                           // GHASH final-1 block - mid | 
|  | mov	x7, v0.d[1]                           // AES final block - mov high | 
|  | pmull2	v22.1q, v22.2d, v16.2d                     // GHASH final-1 block - mid | 
|  | eor	x6, x6, x13                   // AES final block - round N low | 
|  | eor	v11.16b, v11.16b, v21.16b                           // GHASH final-1 block - low | 
|  | eor	v9.16b, v9.16b, v20.16b                           // GHASH final-1 block - high | 
|  | eor	v10.16b, v10.16b, v22.16b                      // GHASH final-1 block - mid | 
|  | eor	x7, x7, x14                   // AES final block - round N high | 
|  | Ldec_blocks_less_than_1:	//	blocks left <= 1 | 
|  | and	x1, x1, #127                   // bit_length %= 128 | 
|  | mvn	x14, xzr                                      // rkN_h = 0xffffffffffffffff | 
|  | sub	x1, x1, #128                   // bit_length -= 128 | 
|  | mvn	x13, xzr                                      // rkN_l = 0xffffffffffffffff | 
|  | ldp	x4, x5, [x2] // load existing bytes we need to not overwrite | 
|  | neg	x1, x1                         // bit_length = 128 - #bits in input (in range [1,128]) | 
|  | and	x1, x1, #127                   // bit_length %= 128 | 
|  | lsr	x14, x14, x1                      // rkN_h is mask for top 64b of last block | 
|  | cmp	x1, #64 | 
|  | csel	x9, x13, x14, lt | 
|  | csel	x10, x14, xzr, lt | 
|  | fmov	d0, x9                                  // ctr0b is mask for last block | 
|  | and	x6, x6, x9 | 
|  | mov	v0.d[1], x10 | 
|  | bic	x4, x4, x9          // mask out low existing bytes | 
|  | rev	w9, w12 | 
|  | bic	x5, x5, x10      // mask out high existing bytes | 
|  | orr	x6, x6, x4 | 
|  | and	x7, x7, x10 | 
|  | orr	x7, x7, x5 | 
|  | and	v5.16b, v5.16b, v0.16b                            // possibly partial last block has zeroes in highest bits | 
|  | rev64	v4.16b, v5.16b                                    // GHASH final block | 
|  | eor	v4.16b, v4.16b, v8.16b                           // feed in partial tag | 
|  | pmull	v21.1q, v4.1d, v12.1d                          // GHASH final block - low | 
|  | mov	d8, v4.d[1]                                  // GHASH final block - mid | 
|  | eor	v8.8b, v8.8b, v4.8b                          // GHASH final block - mid | 
|  | pmull2	v20.1q, v4.2d, v12.2d                          // GHASH final block - high | 
|  | pmull	v8.1q, v8.1d, v16.1d                          // GHASH final block - mid | 
|  | eor	v9.16b, v9.16b, v20.16b                            // GHASH final block - high | 
|  | eor	v11.16b, v11.16b, v21.16b                            // GHASH final block - low | 
|  | eor	v10.16b, v10.16b, v8.16b                         // GHASH final block - mid | 
|  | movi	v8.8b, #0xc2 | 
|  | eor	v6.16b, v11.16b, v9.16b                         // MODULO - karatsuba tidy up | 
|  | shl	d8, d8, #56               // mod_constant | 
|  | eor	v10.16b, v10.16b, v6.16b                         // MODULO - karatsuba tidy up | 
|  | pmull	v7.1q, v9.1d, v8.1d            // MODULO - top 64b align with mid | 
|  | ext	v9.16b, v9.16b, v9.16b, #8                     // MODULO - other top alignment | 
|  | eor	v10.16b, v10.16b, v7.16b                      // MODULO - fold into mid | 
|  | eor	v10.16b, v10.16b, v9.16b                         // MODULO - fold into mid | 
|  | pmull	v8.1q, v10.1d, v8.1d     // MODULO - mid 64b align with low | 
|  | ext	v10.16b, v10.16b, v10.16b, #8                     // MODULO - other mid alignment | 
|  | eor	v11.16b, v11.16b, v8.16b               // MODULO - fold into low | 
|  | stp	x6, x7, [x2] | 
|  | str	w9, [x16, #12]                          // store the updated counter | 
|  | eor	v11.16b, v11.16b, v10.16b                         // MODULO - fold into low | 
|  | ext	v11.16b, v11.16b, v11.16b, #8 | 
|  | rev64	v11.16b, v11.16b | 
|  | mov	x0, x15 | 
|  | st1	{ v11.16b }, [x3] | 
|  | ldp	x19, x20, [sp, #16] | 
|  | ldp	x21, x22, [sp, #32] | 
|  | ldp	x23, x24, [sp, #48] | 
|  | ldp	d8, d9, [sp, #64] | 
|  | ldp	d10, d11, [sp, #80] | 
|  | ldp	d12, d13, [sp, #96] | 
|  | ldp	d14, d15, [sp, #112] | 
|  | ldp	x29, x30, [sp], #128 | 
|  | AARCH64_VALIDATE_LINK_REGISTER | 
|  | ret | 
|  |  | 
|  | #endif | 
|  | #endif  // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) |