blob: 0247a2d21147c1a50f5b525b6b2cffdd96c0aba6 [file] [log] [blame]
David Benjaminfe0c91e2024-03-18 15:37:24 +10001// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
7.text
David Benjaminfe0c91e2024-03-18 15:37:24 +10008.globl _aes_hw_encrypt
9.private_extern _aes_hw_encrypt
10
11.p2align 4
12_aes_hw_encrypt:
13
14_CET_ENDBR
15#ifdef BORINGSSL_DISPATCH_TEST
16
17 movb $1,_BORINGSSL_function_hit+1(%rip)
18#endif
19 movups (%rdi),%xmm2
20 movl 240(%rdx),%eax
21 movups (%rdx),%xmm0
22 movups 16(%rdx),%xmm1
23 leaq 32(%rdx),%rdx
24 xorps %xmm0,%xmm2
25L$oop_enc1_1:
26.byte 102,15,56,220,209
27 decl %eax
28 movups (%rdx),%xmm1
29 leaq 16(%rdx),%rdx
30 jnz L$oop_enc1_1
31.byte 102,15,56,221,209
32 pxor %xmm0,%xmm0
33 pxor %xmm1,%xmm1
34 movups %xmm2,(%rsi)
35 pxor %xmm2,%xmm2
36 ret
37
38
39
40.globl _aes_hw_decrypt
41.private_extern _aes_hw_decrypt
42
43.p2align 4
44_aes_hw_decrypt:
45
46_CET_ENDBR
47 movups (%rdi),%xmm2
48 movl 240(%rdx),%eax
49 movups (%rdx),%xmm0
50 movups 16(%rdx),%xmm1
51 leaq 32(%rdx),%rdx
52 xorps %xmm0,%xmm2
53L$oop_dec1_2:
54.byte 102,15,56,222,209
55 decl %eax
56 movups (%rdx),%xmm1
57 leaq 16(%rdx),%rdx
58 jnz L$oop_dec1_2
59.byte 102,15,56,223,209
60 pxor %xmm0,%xmm0
61 pxor %xmm1,%xmm1
62 movups %xmm2,(%rsi)
63 pxor %xmm2,%xmm2
64 ret
65
66
67
68.p2align 4
69_aesni_encrypt2:
70
71 movups (%rcx),%xmm0
72 shll $4,%eax
73 movups 16(%rcx),%xmm1
74 xorps %xmm0,%xmm2
75 xorps %xmm0,%xmm3
76 movups 32(%rcx),%xmm0
77 leaq 32(%rcx,%rax,1),%rcx
78 negq %rax
79 addq $16,%rax
80
81L$enc_loop2:
82.byte 102,15,56,220,209
83.byte 102,15,56,220,217
84 movups (%rcx,%rax,1),%xmm1
85 addq $32,%rax
86.byte 102,15,56,220,208
87.byte 102,15,56,220,216
88 movups -16(%rcx,%rax,1),%xmm0
89 jnz L$enc_loop2
90
91.byte 102,15,56,220,209
92.byte 102,15,56,220,217
93.byte 102,15,56,221,208
94.byte 102,15,56,221,216
95 ret
96
97
98
99.p2align 4
100_aesni_decrypt2:
101
102 movups (%rcx),%xmm0
103 shll $4,%eax
104 movups 16(%rcx),%xmm1
105 xorps %xmm0,%xmm2
106 xorps %xmm0,%xmm3
107 movups 32(%rcx),%xmm0
108 leaq 32(%rcx,%rax,1),%rcx
109 negq %rax
110 addq $16,%rax
111
112L$dec_loop2:
113.byte 102,15,56,222,209
114.byte 102,15,56,222,217
115 movups (%rcx,%rax,1),%xmm1
116 addq $32,%rax
117.byte 102,15,56,222,208
118.byte 102,15,56,222,216
119 movups -16(%rcx,%rax,1),%xmm0
120 jnz L$dec_loop2
121
122.byte 102,15,56,222,209
123.byte 102,15,56,222,217
124.byte 102,15,56,223,208
125.byte 102,15,56,223,216
126 ret
127
128
129
130.p2align 4
131_aesni_encrypt3:
132
133 movups (%rcx),%xmm0
134 shll $4,%eax
135 movups 16(%rcx),%xmm1
136 xorps %xmm0,%xmm2
137 xorps %xmm0,%xmm3
138 xorps %xmm0,%xmm4
139 movups 32(%rcx),%xmm0
140 leaq 32(%rcx,%rax,1),%rcx
141 negq %rax
142 addq $16,%rax
143
144L$enc_loop3:
145.byte 102,15,56,220,209
146.byte 102,15,56,220,217
147.byte 102,15,56,220,225
148 movups (%rcx,%rax,1),%xmm1
149 addq $32,%rax
150.byte 102,15,56,220,208
151.byte 102,15,56,220,216
152.byte 102,15,56,220,224
153 movups -16(%rcx,%rax,1),%xmm0
154 jnz L$enc_loop3
155
156.byte 102,15,56,220,209
157.byte 102,15,56,220,217
158.byte 102,15,56,220,225
159.byte 102,15,56,221,208
160.byte 102,15,56,221,216
161.byte 102,15,56,221,224
162 ret
163
164
165
166.p2align 4
167_aesni_decrypt3:
168
169 movups (%rcx),%xmm0
170 shll $4,%eax
171 movups 16(%rcx),%xmm1
172 xorps %xmm0,%xmm2
173 xorps %xmm0,%xmm3
174 xorps %xmm0,%xmm4
175 movups 32(%rcx),%xmm0
176 leaq 32(%rcx,%rax,1),%rcx
177 negq %rax
178 addq $16,%rax
179
180L$dec_loop3:
181.byte 102,15,56,222,209
182.byte 102,15,56,222,217
183.byte 102,15,56,222,225
184 movups (%rcx,%rax,1),%xmm1
185 addq $32,%rax
186.byte 102,15,56,222,208
187.byte 102,15,56,222,216
188.byte 102,15,56,222,224
189 movups -16(%rcx,%rax,1),%xmm0
190 jnz L$dec_loop3
191
192.byte 102,15,56,222,209
193.byte 102,15,56,222,217
194.byte 102,15,56,222,225
195.byte 102,15,56,223,208
196.byte 102,15,56,223,216
197.byte 102,15,56,223,224
198 ret
199
200
201
202.p2align 4
203_aesni_encrypt4:
204
205 movups (%rcx),%xmm0
206 shll $4,%eax
207 movups 16(%rcx),%xmm1
208 xorps %xmm0,%xmm2
209 xorps %xmm0,%xmm3
210 xorps %xmm0,%xmm4
211 xorps %xmm0,%xmm5
212 movups 32(%rcx),%xmm0
213 leaq 32(%rcx,%rax,1),%rcx
214 negq %rax
215.byte 0x0f,0x1f,0x00
216 addq $16,%rax
217
218L$enc_loop4:
219.byte 102,15,56,220,209
220.byte 102,15,56,220,217
221.byte 102,15,56,220,225
222.byte 102,15,56,220,233
223 movups (%rcx,%rax,1),%xmm1
224 addq $32,%rax
225.byte 102,15,56,220,208
226.byte 102,15,56,220,216
227.byte 102,15,56,220,224
228.byte 102,15,56,220,232
229 movups -16(%rcx,%rax,1),%xmm0
230 jnz L$enc_loop4
231
232.byte 102,15,56,220,209
233.byte 102,15,56,220,217
234.byte 102,15,56,220,225
235.byte 102,15,56,220,233
236.byte 102,15,56,221,208
237.byte 102,15,56,221,216
238.byte 102,15,56,221,224
239.byte 102,15,56,221,232
240 ret
241
242
243
244.p2align 4
245_aesni_decrypt4:
246
247 movups (%rcx),%xmm0
248 shll $4,%eax
249 movups 16(%rcx),%xmm1
250 xorps %xmm0,%xmm2
251 xorps %xmm0,%xmm3
252 xorps %xmm0,%xmm4
253 xorps %xmm0,%xmm5
254 movups 32(%rcx),%xmm0
255 leaq 32(%rcx,%rax,1),%rcx
256 negq %rax
257.byte 0x0f,0x1f,0x00
258 addq $16,%rax
259
260L$dec_loop4:
261.byte 102,15,56,222,209
262.byte 102,15,56,222,217
263.byte 102,15,56,222,225
264.byte 102,15,56,222,233
265 movups (%rcx,%rax,1),%xmm1
266 addq $32,%rax
267.byte 102,15,56,222,208
268.byte 102,15,56,222,216
269.byte 102,15,56,222,224
270.byte 102,15,56,222,232
271 movups -16(%rcx,%rax,1),%xmm0
272 jnz L$dec_loop4
273
274.byte 102,15,56,222,209
275.byte 102,15,56,222,217
276.byte 102,15,56,222,225
277.byte 102,15,56,222,233
278.byte 102,15,56,223,208
279.byte 102,15,56,223,216
280.byte 102,15,56,223,224
281.byte 102,15,56,223,232
282 ret
283
284
285
286.p2align 4
287_aesni_encrypt6:
288
289 movups (%rcx),%xmm0
290 shll $4,%eax
291 movups 16(%rcx),%xmm1
292 xorps %xmm0,%xmm2
293 pxor %xmm0,%xmm3
294 pxor %xmm0,%xmm4
295.byte 102,15,56,220,209
296 leaq 32(%rcx,%rax,1),%rcx
297 negq %rax
298.byte 102,15,56,220,217
299 pxor %xmm0,%xmm5
300 pxor %xmm0,%xmm6
301.byte 102,15,56,220,225
302 pxor %xmm0,%xmm7
303 movups (%rcx,%rax,1),%xmm0
304 addq $16,%rax
305 jmp L$enc_loop6_enter
306.p2align 4
307L$enc_loop6:
308.byte 102,15,56,220,209
309.byte 102,15,56,220,217
310.byte 102,15,56,220,225
311L$enc_loop6_enter:
312.byte 102,15,56,220,233
313.byte 102,15,56,220,241
314.byte 102,15,56,220,249
315 movups (%rcx,%rax,1),%xmm1
316 addq $32,%rax
317.byte 102,15,56,220,208
318.byte 102,15,56,220,216
319.byte 102,15,56,220,224
320.byte 102,15,56,220,232
321.byte 102,15,56,220,240
322.byte 102,15,56,220,248
323 movups -16(%rcx,%rax,1),%xmm0
324 jnz L$enc_loop6
325
326.byte 102,15,56,220,209
327.byte 102,15,56,220,217
328.byte 102,15,56,220,225
329.byte 102,15,56,220,233
330.byte 102,15,56,220,241
331.byte 102,15,56,220,249
332.byte 102,15,56,221,208
333.byte 102,15,56,221,216
334.byte 102,15,56,221,224
335.byte 102,15,56,221,232
336.byte 102,15,56,221,240
337.byte 102,15,56,221,248
338 ret
339
340
341
342.p2align 4
343_aesni_decrypt6:
344
345 movups (%rcx),%xmm0
346 shll $4,%eax
347 movups 16(%rcx),%xmm1
348 xorps %xmm0,%xmm2
349 pxor %xmm0,%xmm3
350 pxor %xmm0,%xmm4
351.byte 102,15,56,222,209
352 leaq 32(%rcx,%rax,1),%rcx
353 negq %rax
354.byte 102,15,56,222,217
355 pxor %xmm0,%xmm5
356 pxor %xmm0,%xmm6
357.byte 102,15,56,222,225
358 pxor %xmm0,%xmm7
359 movups (%rcx,%rax,1),%xmm0
360 addq $16,%rax
361 jmp L$dec_loop6_enter
362.p2align 4
363L$dec_loop6:
364.byte 102,15,56,222,209
365.byte 102,15,56,222,217
366.byte 102,15,56,222,225
367L$dec_loop6_enter:
368.byte 102,15,56,222,233
369.byte 102,15,56,222,241
370.byte 102,15,56,222,249
371 movups (%rcx,%rax,1),%xmm1
372 addq $32,%rax
373.byte 102,15,56,222,208
374.byte 102,15,56,222,216
375.byte 102,15,56,222,224
376.byte 102,15,56,222,232
377.byte 102,15,56,222,240
378.byte 102,15,56,222,248
379 movups -16(%rcx,%rax,1),%xmm0
380 jnz L$dec_loop6
381
382.byte 102,15,56,222,209
383.byte 102,15,56,222,217
384.byte 102,15,56,222,225
385.byte 102,15,56,222,233
386.byte 102,15,56,222,241
387.byte 102,15,56,222,249
388.byte 102,15,56,223,208
389.byte 102,15,56,223,216
390.byte 102,15,56,223,224
391.byte 102,15,56,223,232
392.byte 102,15,56,223,240
393.byte 102,15,56,223,248
394 ret
395
396
397
398.p2align 4
399_aesni_encrypt8:
400
401 movups (%rcx),%xmm0
402 shll $4,%eax
403 movups 16(%rcx),%xmm1
404 xorps %xmm0,%xmm2
405 xorps %xmm0,%xmm3
406 pxor %xmm0,%xmm4
407 pxor %xmm0,%xmm5
408 pxor %xmm0,%xmm6
409 leaq 32(%rcx,%rax,1),%rcx
410 negq %rax
411.byte 102,15,56,220,209
412 pxor %xmm0,%xmm7
413 pxor %xmm0,%xmm8
414.byte 102,15,56,220,217
415 pxor %xmm0,%xmm9
416 movups (%rcx,%rax,1),%xmm0
417 addq $16,%rax
418 jmp L$enc_loop8_inner
419.p2align 4
420L$enc_loop8:
421.byte 102,15,56,220,209
422.byte 102,15,56,220,217
423L$enc_loop8_inner:
424.byte 102,15,56,220,225
425.byte 102,15,56,220,233
426.byte 102,15,56,220,241
427.byte 102,15,56,220,249
428.byte 102,68,15,56,220,193
429.byte 102,68,15,56,220,201
430L$enc_loop8_enter:
431 movups (%rcx,%rax,1),%xmm1
432 addq $32,%rax
433.byte 102,15,56,220,208
434.byte 102,15,56,220,216
435.byte 102,15,56,220,224
436.byte 102,15,56,220,232
437.byte 102,15,56,220,240
438.byte 102,15,56,220,248
439.byte 102,68,15,56,220,192
440.byte 102,68,15,56,220,200
441 movups -16(%rcx,%rax,1),%xmm0
442 jnz L$enc_loop8
443
444.byte 102,15,56,220,209
445.byte 102,15,56,220,217
446.byte 102,15,56,220,225
447.byte 102,15,56,220,233
448.byte 102,15,56,220,241
449.byte 102,15,56,220,249
450.byte 102,68,15,56,220,193
451.byte 102,68,15,56,220,201
452.byte 102,15,56,221,208
453.byte 102,15,56,221,216
454.byte 102,15,56,221,224
455.byte 102,15,56,221,232
456.byte 102,15,56,221,240
457.byte 102,15,56,221,248
458.byte 102,68,15,56,221,192
459.byte 102,68,15,56,221,200
460 ret
461
462
463
464.p2align 4
465_aesni_decrypt8:
466
467 movups (%rcx),%xmm0
468 shll $4,%eax
469 movups 16(%rcx),%xmm1
470 xorps %xmm0,%xmm2
471 xorps %xmm0,%xmm3
472 pxor %xmm0,%xmm4
473 pxor %xmm0,%xmm5
474 pxor %xmm0,%xmm6
475 leaq 32(%rcx,%rax,1),%rcx
476 negq %rax
477.byte 102,15,56,222,209
478 pxor %xmm0,%xmm7
479 pxor %xmm0,%xmm8
480.byte 102,15,56,222,217
481 pxor %xmm0,%xmm9
482 movups (%rcx,%rax,1),%xmm0
483 addq $16,%rax
484 jmp L$dec_loop8_inner
485.p2align 4
486L$dec_loop8:
487.byte 102,15,56,222,209
488.byte 102,15,56,222,217
489L$dec_loop8_inner:
490.byte 102,15,56,222,225
491.byte 102,15,56,222,233
492.byte 102,15,56,222,241
493.byte 102,15,56,222,249
494.byte 102,68,15,56,222,193
495.byte 102,68,15,56,222,201
496L$dec_loop8_enter:
497 movups (%rcx,%rax,1),%xmm1
498 addq $32,%rax
499.byte 102,15,56,222,208
500.byte 102,15,56,222,216
501.byte 102,15,56,222,224
502.byte 102,15,56,222,232
503.byte 102,15,56,222,240
504.byte 102,15,56,222,248
505.byte 102,68,15,56,222,192
506.byte 102,68,15,56,222,200
507 movups -16(%rcx,%rax,1),%xmm0
508 jnz L$dec_loop8
509
510.byte 102,15,56,222,209
511.byte 102,15,56,222,217
512.byte 102,15,56,222,225
513.byte 102,15,56,222,233
514.byte 102,15,56,222,241
515.byte 102,15,56,222,249
516.byte 102,68,15,56,222,193
517.byte 102,68,15,56,222,201
518.byte 102,15,56,223,208
519.byte 102,15,56,223,216
520.byte 102,15,56,223,224
521.byte 102,15,56,223,232
522.byte 102,15,56,223,240
523.byte 102,15,56,223,248
524.byte 102,68,15,56,223,192
525.byte 102,68,15,56,223,200
526 ret
527
528
529.globl _aes_hw_ecb_encrypt
530.private_extern _aes_hw_ecb_encrypt
531
532.p2align 4
533_aes_hw_ecb_encrypt:
534
535_CET_ENDBR
536 andq $-16,%rdx
537 jz L$ecb_ret
538
539 movl 240(%rcx),%eax
540 movups (%rcx),%xmm0
541 movq %rcx,%r11
542 movl %eax,%r10d
543 testl %r8d,%r8d
544 jz L$ecb_decrypt
545
546 cmpq $0x80,%rdx
547 jb L$ecb_enc_tail
548
549 movdqu (%rdi),%xmm2
550 movdqu 16(%rdi),%xmm3
551 movdqu 32(%rdi),%xmm4
552 movdqu 48(%rdi),%xmm5
553 movdqu 64(%rdi),%xmm6
554 movdqu 80(%rdi),%xmm7
555 movdqu 96(%rdi),%xmm8
556 movdqu 112(%rdi),%xmm9
557 leaq 128(%rdi),%rdi
558 subq $0x80,%rdx
559 jmp L$ecb_enc_loop8_enter
560.p2align 4
561L$ecb_enc_loop8:
562 movups %xmm2,(%rsi)
563 movq %r11,%rcx
564 movdqu (%rdi),%xmm2
565 movl %r10d,%eax
566 movups %xmm3,16(%rsi)
567 movdqu 16(%rdi),%xmm3
568 movups %xmm4,32(%rsi)
569 movdqu 32(%rdi),%xmm4
570 movups %xmm5,48(%rsi)
571 movdqu 48(%rdi),%xmm5
572 movups %xmm6,64(%rsi)
573 movdqu 64(%rdi),%xmm6
574 movups %xmm7,80(%rsi)
575 movdqu 80(%rdi),%xmm7
576 movups %xmm8,96(%rsi)
577 movdqu 96(%rdi),%xmm8
578 movups %xmm9,112(%rsi)
579 leaq 128(%rsi),%rsi
580 movdqu 112(%rdi),%xmm9
581 leaq 128(%rdi),%rdi
582L$ecb_enc_loop8_enter:
583
584 call _aesni_encrypt8
585
586 subq $0x80,%rdx
587 jnc L$ecb_enc_loop8
588
589 movups %xmm2,(%rsi)
590 movq %r11,%rcx
591 movups %xmm3,16(%rsi)
592 movl %r10d,%eax
593 movups %xmm4,32(%rsi)
594 movups %xmm5,48(%rsi)
595 movups %xmm6,64(%rsi)
596 movups %xmm7,80(%rsi)
597 movups %xmm8,96(%rsi)
598 movups %xmm9,112(%rsi)
599 leaq 128(%rsi),%rsi
600 addq $0x80,%rdx
601 jz L$ecb_ret
602
603L$ecb_enc_tail:
604 movups (%rdi),%xmm2
605 cmpq $0x20,%rdx
606 jb L$ecb_enc_one
607 movups 16(%rdi),%xmm3
608 je L$ecb_enc_two
609 movups 32(%rdi),%xmm4
610 cmpq $0x40,%rdx
611 jb L$ecb_enc_three
612 movups 48(%rdi),%xmm5
613 je L$ecb_enc_four
614 movups 64(%rdi),%xmm6
615 cmpq $0x60,%rdx
616 jb L$ecb_enc_five
617 movups 80(%rdi),%xmm7
618 je L$ecb_enc_six
619 movdqu 96(%rdi),%xmm8
620 xorps %xmm9,%xmm9
621 call _aesni_encrypt8
622 movups %xmm2,(%rsi)
623 movups %xmm3,16(%rsi)
624 movups %xmm4,32(%rsi)
625 movups %xmm5,48(%rsi)
626 movups %xmm6,64(%rsi)
627 movups %xmm7,80(%rsi)
628 movups %xmm8,96(%rsi)
629 jmp L$ecb_ret
630.p2align 4
631L$ecb_enc_one:
632 movups (%rcx),%xmm0
633 movups 16(%rcx),%xmm1
634 leaq 32(%rcx),%rcx
635 xorps %xmm0,%xmm2
636L$oop_enc1_3:
637.byte 102,15,56,220,209
638 decl %eax
639 movups (%rcx),%xmm1
640 leaq 16(%rcx),%rcx
641 jnz L$oop_enc1_3
642.byte 102,15,56,221,209
643 movups %xmm2,(%rsi)
644 jmp L$ecb_ret
645.p2align 4
646L$ecb_enc_two:
647 call _aesni_encrypt2
648 movups %xmm2,(%rsi)
649 movups %xmm3,16(%rsi)
650 jmp L$ecb_ret
651.p2align 4
652L$ecb_enc_three:
653 call _aesni_encrypt3
654 movups %xmm2,(%rsi)
655 movups %xmm3,16(%rsi)
656 movups %xmm4,32(%rsi)
657 jmp L$ecb_ret
658.p2align 4
659L$ecb_enc_four:
660 call _aesni_encrypt4
661 movups %xmm2,(%rsi)
662 movups %xmm3,16(%rsi)
663 movups %xmm4,32(%rsi)
664 movups %xmm5,48(%rsi)
665 jmp L$ecb_ret
666.p2align 4
667L$ecb_enc_five:
668 xorps %xmm7,%xmm7
669 call _aesni_encrypt6
670 movups %xmm2,(%rsi)
671 movups %xmm3,16(%rsi)
672 movups %xmm4,32(%rsi)
673 movups %xmm5,48(%rsi)
674 movups %xmm6,64(%rsi)
675 jmp L$ecb_ret
676.p2align 4
677L$ecb_enc_six:
678 call _aesni_encrypt6
679 movups %xmm2,(%rsi)
680 movups %xmm3,16(%rsi)
681 movups %xmm4,32(%rsi)
682 movups %xmm5,48(%rsi)
683 movups %xmm6,64(%rsi)
684 movups %xmm7,80(%rsi)
685 jmp L$ecb_ret
686
687.p2align 4
688L$ecb_decrypt:
689 cmpq $0x80,%rdx
690 jb L$ecb_dec_tail
691
692 movdqu (%rdi),%xmm2
693 movdqu 16(%rdi),%xmm3
694 movdqu 32(%rdi),%xmm4
695 movdqu 48(%rdi),%xmm5
696 movdqu 64(%rdi),%xmm6
697 movdqu 80(%rdi),%xmm7
698 movdqu 96(%rdi),%xmm8
699 movdqu 112(%rdi),%xmm9
700 leaq 128(%rdi),%rdi
701 subq $0x80,%rdx
702 jmp L$ecb_dec_loop8_enter
703.p2align 4
704L$ecb_dec_loop8:
705 movups %xmm2,(%rsi)
706 movq %r11,%rcx
707 movdqu (%rdi),%xmm2
708 movl %r10d,%eax
709 movups %xmm3,16(%rsi)
710 movdqu 16(%rdi),%xmm3
711 movups %xmm4,32(%rsi)
712 movdqu 32(%rdi),%xmm4
713 movups %xmm5,48(%rsi)
714 movdqu 48(%rdi),%xmm5
715 movups %xmm6,64(%rsi)
716 movdqu 64(%rdi),%xmm6
717 movups %xmm7,80(%rsi)
718 movdqu 80(%rdi),%xmm7
719 movups %xmm8,96(%rsi)
720 movdqu 96(%rdi),%xmm8
721 movups %xmm9,112(%rsi)
722 leaq 128(%rsi),%rsi
723 movdqu 112(%rdi),%xmm9
724 leaq 128(%rdi),%rdi
725L$ecb_dec_loop8_enter:
726
727 call _aesni_decrypt8
728
729 movups (%r11),%xmm0
730 subq $0x80,%rdx
731 jnc L$ecb_dec_loop8
732
733 movups %xmm2,(%rsi)
734 pxor %xmm2,%xmm2
735 movq %r11,%rcx
736 movups %xmm3,16(%rsi)
737 pxor %xmm3,%xmm3
738 movl %r10d,%eax
739 movups %xmm4,32(%rsi)
740 pxor %xmm4,%xmm4
741 movups %xmm5,48(%rsi)
742 pxor %xmm5,%xmm5
743 movups %xmm6,64(%rsi)
744 pxor %xmm6,%xmm6
745 movups %xmm7,80(%rsi)
746 pxor %xmm7,%xmm7
747 movups %xmm8,96(%rsi)
748 pxor %xmm8,%xmm8
749 movups %xmm9,112(%rsi)
750 pxor %xmm9,%xmm9
751 leaq 128(%rsi),%rsi
752 addq $0x80,%rdx
753 jz L$ecb_ret
754
755L$ecb_dec_tail:
756 movups (%rdi),%xmm2
757 cmpq $0x20,%rdx
758 jb L$ecb_dec_one
759 movups 16(%rdi),%xmm3
760 je L$ecb_dec_two
761 movups 32(%rdi),%xmm4
762 cmpq $0x40,%rdx
763 jb L$ecb_dec_three
764 movups 48(%rdi),%xmm5
765 je L$ecb_dec_four
766 movups 64(%rdi),%xmm6
767 cmpq $0x60,%rdx
768 jb L$ecb_dec_five
769 movups 80(%rdi),%xmm7
770 je L$ecb_dec_six
771 movups 96(%rdi),%xmm8
772 movups (%rcx),%xmm0
773 xorps %xmm9,%xmm9
774 call _aesni_decrypt8
775 movups %xmm2,(%rsi)
776 pxor %xmm2,%xmm2
777 movups %xmm3,16(%rsi)
778 pxor %xmm3,%xmm3
779 movups %xmm4,32(%rsi)
780 pxor %xmm4,%xmm4
781 movups %xmm5,48(%rsi)
782 pxor %xmm5,%xmm5
783 movups %xmm6,64(%rsi)
784 pxor %xmm6,%xmm6
785 movups %xmm7,80(%rsi)
786 pxor %xmm7,%xmm7
787 movups %xmm8,96(%rsi)
788 pxor %xmm8,%xmm8
789 pxor %xmm9,%xmm9
790 jmp L$ecb_ret
791.p2align 4
792L$ecb_dec_one:
793 movups (%rcx),%xmm0
794 movups 16(%rcx),%xmm1
795 leaq 32(%rcx),%rcx
796 xorps %xmm0,%xmm2
797L$oop_dec1_4:
798.byte 102,15,56,222,209
799 decl %eax
800 movups (%rcx),%xmm1
801 leaq 16(%rcx),%rcx
802 jnz L$oop_dec1_4
803.byte 102,15,56,223,209
804 movups %xmm2,(%rsi)
805 pxor %xmm2,%xmm2
806 jmp L$ecb_ret
807.p2align 4
808L$ecb_dec_two:
809 call _aesni_decrypt2
810 movups %xmm2,(%rsi)
811 pxor %xmm2,%xmm2
812 movups %xmm3,16(%rsi)
813 pxor %xmm3,%xmm3
814 jmp L$ecb_ret
815.p2align 4
816L$ecb_dec_three:
817 call _aesni_decrypt3
818 movups %xmm2,(%rsi)
819 pxor %xmm2,%xmm2
820 movups %xmm3,16(%rsi)
821 pxor %xmm3,%xmm3
822 movups %xmm4,32(%rsi)
823 pxor %xmm4,%xmm4
824 jmp L$ecb_ret
825.p2align 4
826L$ecb_dec_four:
827 call _aesni_decrypt4
828 movups %xmm2,(%rsi)
829 pxor %xmm2,%xmm2
830 movups %xmm3,16(%rsi)
831 pxor %xmm3,%xmm3
832 movups %xmm4,32(%rsi)
833 pxor %xmm4,%xmm4
834 movups %xmm5,48(%rsi)
835 pxor %xmm5,%xmm5
836 jmp L$ecb_ret
837.p2align 4
838L$ecb_dec_five:
839 xorps %xmm7,%xmm7
840 call _aesni_decrypt6
841 movups %xmm2,(%rsi)
842 pxor %xmm2,%xmm2
843 movups %xmm3,16(%rsi)
844 pxor %xmm3,%xmm3
845 movups %xmm4,32(%rsi)
846 pxor %xmm4,%xmm4
847 movups %xmm5,48(%rsi)
848 pxor %xmm5,%xmm5
849 movups %xmm6,64(%rsi)
850 pxor %xmm6,%xmm6
851 pxor %xmm7,%xmm7
852 jmp L$ecb_ret
853.p2align 4
854L$ecb_dec_six:
855 call _aesni_decrypt6
856 movups %xmm2,(%rsi)
857 pxor %xmm2,%xmm2
858 movups %xmm3,16(%rsi)
859 pxor %xmm3,%xmm3
860 movups %xmm4,32(%rsi)
861 pxor %xmm4,%xmm4
862 movups %xmm5,48(%rsi)
863 pxor %xmm5,%xmm5
864 movups %xmm6,64(%rsi)
865 pxor %xmm6,%xmm6
866 movups %xmm7,80(%rsi)
867 pxor %xmm7,%xmm7
868
869L$ecb_ret:
870 xorps %xmm0,%xmm0
871 pxor %xmm1,%xmm1
872 ret
873
874
875.globl _aes_hw_ctr32_encrypt_blocks
876.private_extern _aes_hw_ctr32_encrypt_blocks
877
878.p2align 4
879_aes_hw_ctr32_encrypt_blocks:
880
881_CET_ENDBR
882#ifdef BORINGSSL_DISPATCH_TEST
883 movb $1,_BORINGSSL_function_hit(%rip)
884#endif
885 cmpq $1,%rdx
886 jne L$ctr32_bulk
887
888
889
890 movups (%r8),%xmm2
891 movups (%rdi),%xmm3
892 movl 240(%rcx),%edx
893 movups (%rcx),%xmm0
894 movups 16(%rcx),%xmm1
895 leaq 32(%rcx),%rcx
896 xorps %xmm0,%xmm2
897L$oop_enc1_5:
898.byte 102,15,56,220,209
899 decl %edx
900 movups (%rcx),%xmm1
901 leaq 16(%rcx),%rcx
902 jnz L$oop_enc1_5
903.byte 102,15,56,221,209
904 pxor %xmm0,%xmm0
905 pxor %xmm1,%xmm1
906 xorps %xmm3,%xmm2
907 pxor %xmm3,%xmm3
908 movups %xmm2,(%rsi)
909 xorps %xmm2,%xmm2
910 jmp L$ctr32_epilogue
911
912.p2align 4
913L$ctr32_bulk:
914 leaq (%rsp),%r11
915
916 pushq %rbp
917
918 subq $128,%rsp
919 andq $-16,%rsp
920
921
922
923
924 movdqu (%r8),%xmm2
925 movdqu (%rcx),%xmm0
926 movl 12(%r8),%r8d
927 pxor %xmm0,%xmm2
928 movl 12(%rcx),%ebp
929 movdqa %xmm2,0(%rsp)
930 bswapl %r8d
931 movdqa %xmm2,%xmm3
932 movdqa %xmm2,%xmm4
933 movdqa %xmm2,%xmm5
934 movdqa %xmm2,64(%rsp)
935 movdqa %xmm2,80(%rsp)
936 movdqa %xmm2,96(%rsp)
937 movq %rdx,%r10
938 movdqa %xmm2,112(%rsp)
939
940 leaq 1(%r8),%rax
941 leaq 2(%r8),%rdx
942 bswapl %eax
943 bswapl %edx
944 xorl %ebp,%eax
945 xorl %ebp,%edx
946.byte 102,15,58,34,216,3
947 leaq 3(%r8),%rax
948 movdqa %xmm3,16(%rsp)
949.byte 102,15,58,34,226,3
950 bswapl %eax
951 movq %r10,%rdx
952 leaq 4(%r8),%r10
953 movdqa %xmm4,32(%rsp)
954 xorl %ebp,%eax
955 bswapl %r10d
956.byte 102,15,58,34,232,3
957 xorl %ebp,%r10d
958 movdqa %xmm5,48(%rsp)
959 leaq 5(%r8),%r9
960 movl %r10d,64+12(%rsp)
961 bswapl %r9d
962 leaq 6(%r8),%r10
963 movl 240(%rcx),%eax
964 xorl %ebp,%r9d
965 bswapl %r10d
966 movl %r9d,80+12(%rsp)
967 xorl %ebp,%r10d
968 leaq 7(%r8),%r9
969 movl %r10d,96+12(%rsp)
970 bswapl %r9d
971 xorl %ebp,%r9d
972 movl %r9d,112+12(%rsp)
973
974 movups 16(%rcx),%xmm1
975
976 movdqa 64(%rsp),%xmm6
977 movdqa 80(%rsp),%xmm7
978
979 cmpq $8,%rdx
980 jb L$ctr32_tail
981
982 leaq 128(%rcx),%rcx
983 subq $8,%rdx
984 jmp L$ctr32_loop8
985
986.p2align 5
987L$ctr32_loop8:
988 addl $8,%r8d
989 movdqa 96(%rsp),%xmm8
990.byte 102,15,56,220,209
991 movl %r8d,%r9d
992 movdqa 112(%rsp),%xmm9
993.byte 102,15,56,220,217
994 bswapl %r9d
995 movups 32-128(%rcx),%xmm0
996.byte 102,15,56,220,225
997 xorl %ebp,%r9d
998 nop
999.byte 102,15,56,220,233
1000 movl %r9d,0+12(%rsp)
1001 leaq 1(%r8),%r9
1002.byte 102,15,56,220,241
1003.byte 102,15,56,220,249
1004.byte 102,68,15,56,220,193
1005.byte 102,68,15,56,220,201
1006 movups 48-128(%rcx),%xmm1
1007 bswapl %r9d
1008.byte 102,15,56,220,208
1009.byte 102,15,56,220,216
1010 xorl %ebp,%r9d
1011.byte 0x66,0x90
1012.byte 102,15,56,220,224
1013.byte 102,15,56,220,232
1014 movl %r9d,16+12(%rsp)
1015 leaq 2(%r8),%r9
1016.byte 102,15,56,220,240
1017.byte 102,15,56,220,248
1018.byte 102,68,15,56,220,192
1019.byte 102,68,15,56,220,200
1020 movups 64-128(%rcx),%xmm0
1021 bswapl %r9d
1022.byte 102,15,56,220,209
1023.byte 102,15,56,220,217
1024 xorl %ebp,%r9d
1025.byte 0x66,0x90
1026.byte 102,15,56,220,225
1027.byte 102,15,56,220,233
1028 movl %r9d,32+12(%rsp)
1029 leaq 3(%r8),%r9
1030.byte 102,15,56,220,241
1031.byte 102,15,56,220,249
1032.byte 102,68,15,56,220,193
1033.byte 102,68,15,56,220,201
1034 movups 80-128(%rcx),%xmm1
1035 bswapl %r9d
1036.byte 102,15,56,220,208
1037.byte 102,15,56,220,216
1038 xorl %ebp,%r9d
1039.byte 0x66,0x90
1040.byte 102,15,56,220,224
1041.byte 102,15,56,220,232
1042 movl %r9d,48+12(%rsp)
1043 leaq 4(%r8),%r9
1044.byte 102,15,56,220,240
1045.byte 102,15,56,220,248
1046.byte 102,68,15,56,220,192
1047.byte 102,68,15,56,220,200
1048 movups 96-128(%rcx),%xmm0
1049 bswapl %r9d
1050.byte 102,15,56,220,209
1051.byte 102,15,56,220,217
1052 xorl %ebp,%r9d
1053.byte 0x66,0x90
1054.byte 102,15,56,220,225
1055.byte 102,15,56,220,233
1056 movl %r9d,64+12(%rsp)
1057 leaq 5(%r8),%r9
1058.byte 102,15,56,220,241
1059.byte 102,15,56,220,249
1060.byte 102,68,15,56,220,193
1061.byte 102,68,15,56,220,201
1062 movups 112-128(%rcx),%xmm1
1063 bswapl %r9d
1064.byte 102,15,56,220,208
1065.byte 102,15,56,220,216
1066 xorl %ebp,%r9d
1067.byte 0x66,0x90
1068.byte 102,15,56,220,224
1069.byte 102,15,56,220,232
1070 movl %r9d,80+12(%rsp)
1071 leaq 6(%r8),%r9
1072.byte 102,15,56,220,240
1073.byte 102,15,56,220,248
1074.byte 102,68,15,56,220,192
1075.byte 102,68,15,56,220,200
1076 movups 128-128(%rcx),%xmm0
1077 bswapl %r9d
1078.byte 102,15,56,220,209
1079.byte 102,15,56,220,217
1080 xorl %ebp,%r9d
1081.byte 0x66,0x90
1082.byte 102,15,56,220,225
1083.byte 102,15,56,220,233
1084 movl %r9d,96+12(%rsp)
1085 leaq 7(%r8),%r9
1086.byte 102,15,56,220,241
1087.byte 102,15,56,220,249
1088.byte 102,68,15,56,220,193
1089.byte 102,68,15,56,220,201
1090 movups 144-128(%rcx),%xmm1
1091 bswapl %r9d
1092.byte 102,15,56,220,208
1093.byte 102,15,56,220,216
1094.byte 102,15,56,220,224
1095 xorl %ebp,%r9d
1096 movdqu 0(%rdi),%xmm10
1097.byte 102,15,56,220,232
1098 movl %r9d,112+12(%rsp)
1099 cmpl $11,%eax
1100.byte 102,15,56,220,240
1101.byte 102,15,56,220,248
1102.byte 102,68,15,56,220,192
1103.byte 102,68,15,56,220,200
1104 movups 160-128(%rcx),%xmm0
1105
1106 jb L$ctr32_enc_done
1107
1108.byte 102,15,56,220,209
1109.byte 102,15,56,220,217
1110.byte 102,15,56,220,225
1111.byte 102,15,56,220,233
1112.byte 102,15,56,220,241
1113.byte 102,15,56,220,249
1114.byte 102,68,15,56,220,193
1115.byte 102,68,15,56,220,201
1116 movups 176-128(%rcx),%xmm1
1117
1118.byte 102,15,56,220,208
1119.byte 102,15,56,220,216
1120.byte 102,15,56,220,224
1121.byte 102,15,56,220,232
1122.byte 102,15,56,220,240
1123.byte 102,15,56,220,248
1124.byte 102,68,15,56,220,192
1125.byte 102,68,15,56,220,200
1126 movups 192-128(%rcx),%xmm0
1127 je L$ctr32_enc_done
1128
1129.byte 102,15,56,220,209
1130.byte 102,15,56,220,217
1131.byte 102,15,56,220,225
1132.byte 102,15,56,220,233
1133.byte 102,15,56,220,241
1134.byte 102,15,56,220,249
1135.byte 102,68,15,56,220,193
1136.byte 102,68,15,56,220,201
1137 movups 208-128(%rcx),%xmm1
1138
1139.byte 102,15,56,220,208
1140.byte 102,15,56,220,216
1141.byte 102,15,56,220,224
1142.byte 102,15,56,220,232
1143.byte 102,15,56,220,240
1144.byte 102,15,56,220,248
1145.byte 102,68,15,56,220,192
1146.byte 102,68,15,56,220,200
1147 movups 224-128(%rcx),%xmm0
1148 jmp L$ctr32_enc_done
1149
1150.p2align 4
1151L$ctr32_enc_done:
1152 movdqu 16(%rdi),%xmm11
1153 pxor %xmm0,%xmm10
1154 movdqu 32(%rdi),%xmm12
1155 pxor %xmm0,%xmm11
1156 movdqu 48(%rdi),%xmm13
1157 pxor %xmm0,%xmm12
1158 movdqu 64(%rdi),%xmm14
1159 pxor %xmm0,%xmm13
1160 movdqu 80(%rdi),%xmm15
1161 pxor %xmm0,%xmm14
1162 prefetcht0 448(%rdi)
1163 prefetcht0 512(%rdi)
1164 pxor %xmm0,%xmm15
1165.byte 102,15,56,220,209
1166.byte 102,15,56,220,217
1167.byte 102,15,56,220,225
1168.byte 102,15,56,220,233
1169.byte 102,15,56,220,241
1170.byte 102,15,56,220,249
1171.byte 102,68,15,56,220,193
1172.byte 102,68,15,56,220,201
1173 movdqu 96(%rdi),%xmm1
1174 leaq 128(%rdi),%rdi
1175
1176.byte 102,65,15,56,221,210
1177 pxor %xmm0,%xmm1
1178 movdqu 112-128(%rdi),%xmm10
1179.byte 102,65,15,56,221,219
1180 pxor %xmm0,%xmm10
1181 movdqa 0(%rsp),%xmm11
1182.byte 102,65,15,56,221,228
1183.byte 102,65,15,56,221,237
1184 movdqa 16(%rsp),%xmm12
1185 movdqa 32(%rsp),%xmm13
1186.byte 102,65,15,56,221,246
1187.byte 102,65,15,56,221,255
1188 movdqa 48(%rsp),%xmm14
1189 movdqa 64(%rsp),%xmm15
1190.byte 102,68,15,56,221,193
1191 movdqa 80(%rsp),%xmm0
1192 movups 16-128(%rcx),%xmm1
1193.byte 102,69,15,56,221,202
1194
1195 movups %xmm2,(%rsi)
1196 movdqa %xmm11,%xmm2
1197 movups %xmm3,16(%rsi)
1198 movdqa %xmm12,%xmm3
1199 movups %xmm4,32(%rsi)
1200 movdqa %xmm13,%xmm4
1201 movups %xmm5,48(%rsi)
1202 movdqa %xmm14,%xmm5
1203 movups %xmm6,64(%rsi)
1204 movdqa %xmm15,%xmm6
1205 movups %xmm7,80(%rsi)
1206 movdqa %xmm0,%xmm7
1207 movups %xmm8,96(%rsi)
1208 movups %xmm9,112(%rsi)
1209 leaq 128(%rsi),%rsi
1210
1211 subq $8,%rdx
1212 jnc L$ctr32_loop8
1213
1214 addq $8,%rdx
1215 jz L$ctr32_done
1216 leaq -128(%rcx),%rcx
1217
1218L$ctr32_tail:
1219
1220
1221 leaq 16(%rcx),%rcx
1222 cmpq $4,%rdx
1223 jb L$ctr32_loop3
1224 je L$ctr32_loop4
1225
1226
1227 shll $4,%eax
1228 movdqa 96(%rsp),%xmm8
1229 pxor %xmm9,%xmm9
1230
1231 movups 16(%rcx),%xmm0
1232.byte 102,15,56,220,209
1233.byte 102,15,56,220,217
1234 leaq 32-16(%rcx,%rax,1),%rcx
1235 negq %rax
1236.byte 102,15,56,220,225
1237 addq $16,%rax
1238 movups (%rdi),%xmm10
1239.byte 102,15,56,220,233
1240.byte 102,15,56,220,241
1241 movups 16(%rdi),%xmm11
1242 movups 32(%rdi),%xmm12
1243.byte 102,15,56,220,249
1244.byte 102,68,15,56,220,193
1245
1246 call L$enc_loop8_enter
1247
1248 movdqu 48(%rdi),%xmm13
1249 pxor %xmm10,%xmm2
1250 movdqu 64(%rdi),%xmm10
1251 pxor %xmm11,%xmm3
1252 movdqu %xmm2,(%rsi)
1253 pxor %xmm12,%xmm4
1254 movdqu %xmm3,16(%rsi)
1255 pxor %xmm13,%xmm5
1256 movdqu %xmm4,32(%rsi)
1257 pxor %xmm10,%xmm6
1258 movdqu %xmm5,48(%rsi)
1259 movdqu %xmm6,64(%rsi)
1260 cmpq $6,%rdx
1261 jb L$ctr32_done
1262
1263 movups 80(%rdi),%xmm11
1264 xorps %xmm11,%xmm7
1265 movups %xmm7,80(%rsi)
1266 je L$ctr32_done
1267
1268 movups 96(%rdi),%xmm12
1269 xorps %xmm12,%xmm8
1270 movups %xmm8,96(%rsi)
1271 jmp L$ctr32_done
1272
1273.p2align 5
1274L$ctr32_loop4:
1275.byte 102,15,56,220,209
1276 leaq 16(%rcx),%rcx
1277 decl %eax
1278.byte 102,15,56,220,217
1279.byte 102,15,56,220,225
1280.byte 102,15,56,220,233
1281 movups (%rcx),%xmm1
1282 jnz L$ctr32_loop4
1283.byte 102,15,56,221,209
1284.byte 102,15,56,221,217
1285 movups (%rdi),%xmm10
1286 movups 16(%rdi),%xmm11
1287.byte 102,15,56,221,225
1288.byte 102,15,56,221,233
1289 movups 32(%rdi),%xmm12
1290 movups 48(%rdi),%xmm13
1291
1292 xorps %xmm10,%xmm2
1293 movups %xmm2,(%rsi)
1294 xorps %xmm11,%xmm3
1295 movups %xmm3,16(%rsi)
1296 pxor %xmm12,%xmm4
1297 movdqu %xmm4,32(%rsi)
1298 pxor %xmm13,%xmm5
1299 movdqu %xmm5,48(%rsi)
1300 jmp L$ctr32_done
1301
1302.p2align 5
1303L$ctr32_loop3:
1304.byte 102,15,56,220,209
1305 leaq 16(%rcx),%rcx
1306 decl %eax
1307.byte 102,15,56,220,217
1308.byte 102,15,56,220,225
1309 movups (%rcx),%xmm1
1310 jnz L$ctr32_loop3
1311.byte 102,15,56,221,209
1312.byte 102,15,56,221,217
1313.byte 102,15,56,221,225
1314
1315 movups (%rdi),%xmm10
1316 xorps %xmm10,%xmm2
1317 movups %xmm2,(%rsi)
1318 cmpq $2,%rdx
1319 jb L$ctr32_done
1320
1321 movups 16(%rdi),%xmm11
1322 xorps %xmm11,%xmm3
1323 movups %xmm3,16(%rsi)
1324 je L$ctr32_done
1325
1326 movups 32(%rdi),%xmm12
1327 xorps %xmm12,%xmm4
1328 movups %xmm4,32(%rsi)
1329
1330L$ctr32_done:
1331 xorps %xmm0,%xmm0
1332 xorl %ebp,%ebp
1333 pxor %xmm1,%xmm1
1334 pxor %xmm2,%xmm2
1335 pxor %xmm3,%xmm3
1336 pxor %xmm4,%xmm4
1337 pxor %xmm5,%xmm5
1338 pxor %xmm6,%xmm6
1339 pxor %xmm7,%xmm7
1340 movaps %xmm0,0(%rsp)
1341 pxor %xmm8,%xmm8
1342 movaps %xmm0,16(%rsp)
1343 pxor %xmm9,%xmm9
1344 movaps %xmm0,32(%rsp)
1345 pxor %xmm10,%xmm10
1346 movaps %xmm0,48(%rsp)
1347 pxor %xmm11,%xmm11
1348 movaps %xmm0,64(%rsp)
1349 pxor %xmm12,%xmm12
1350 movaps %xmm0,80(%rsp)
1351 pxor %xmm13,%xmm13
1352 movaps %xmm0,96(%rsp)
1353 pxor %xmm14,%xmm14
1354 movaps %xmm0,112(%rsp)
1355 pxor %xmm15,%xmm15
1356 movq -8(%r11),%rbp
1357
1358 leaq (%r11),%rsp
1359
1360L$ctr32_epilogue:
1361 ret
1362
1363
1364.globl _aes_hw_cbc_encrypt
1365.private_extern _aes_hw_cbc_encrypt
1366
1367.p2align 4
1368_aes_hw_cbc_encrypt:
1369
1370_CET_ENDBR
1371 testq %rdx,%rdx
1372 jz L$cbc_ret
1373
1374 movl 240(%rcx),%r10d
1375 movq %rcx,%r11
1376 testl %r9d,%r9d
1377 jz L$cbc_decrypt
1378
1379 movups (%r8),%xmm2
1380 movl %r10d,%eax
1381 cmpq $16,%rdx
1382 jb L$cbc_enc_tail
1383 subq $16,%rdx
1384 jmp L$cbc_enc_loop
1385.p2align 4
1386L$cbc_enc_loop:
1387 movups (%rdi),%xmm3
1388 leaq 16(%rdi),%rdi
1389
1390 movups (%rcx),%xmm0
1391 movups 16(%rcx),%xmm1
1392 xorps %xmm0,%xmm3
1393 leaq 32(%rcx),%rcx
1394 xorps %xmm3,%xmm2
1395L$oop_enc1_6:
1396.byte 102,15,56,220,209
1397 decl %eax
1398 movups (%rcx),%xmm1
1399 leaq 16(%rcx),%rcx
1400 jnz L$oop_enc1_6
1401.byte 102,15,56,221,209
1402 movl %r10d,%eax
1403 movq %r11,%rcx
1404 movups %xmm2,0(%rsi)
1405 leaq 16(%rsi),%rsi
1406 subq $16,%rdx
1407 jnc L$cbc_enc_loop
1408 addq $16,%rdx
1409 jnz L$cbc_enc_tail
1410 pxor %xmm0,%xmm0
1411 pxor %xmm1,%xmm1
1412 movups %xmm2,(%r8)
1413 pxor %xmm2,%xmm2
1414 pxor %xmm3,%xmm3
1415 jmp L$cbc_ret
1416
1417L$cbc_enc_tail:
1418 movq %rdx,%rcx
1419 xchgq %rdi,%rsi
1420.long 0x9066A4F3
1421 movl $16,%ecx
1422 subq %rdx,%rcx
1423 xorl %eax,%eax
1424.long 0x9066AAF3
1425 leaq -16(%rdi),%rdi
1426 movl %r10d,%eax
1427 movq %rdi,%rsi
1428 movq %r11,%rcx
1429 xorq %rdx,%rdx
1430 jmp L$cbc_enc_loop
1431
1432.p2align 4
1433L$cbc_decrypt:
1434 cmpq $16,%rdx
1435 jne L$cbc_decrypt_bulk
1436
1437
1438
1439 movdqu (%rdi),%xmm2
1440 movdqu (%r8),%xmm3
1441 movdqa %xmm2,%xmm4
1442 movups (%rcx),%xmm0
1443 movups 16(%rcx),%xmm1
1444 leaq 32(%rcx),%rcx
1445 xorps %xmm0,%xmm2
1446L$oop_dec1_7:
1447.byte 102,15,56,222,209
1448 decl %r10d
1449 movups (%rcx),%xmm1
1450 leaq 16(%rcx),%rcx
1451 jnz L$oop_dec1_7
1452.byte 102,15,56,223,209
1453 pxor %xmm0,%xmm0
1454 pxor %xmm1,%xmm1
1455 movdqu %xmm4,(%r8)
1456 xorps %xmm3,%xmm2
1457 pxor %xmm3,%xmm3
1458 movups %xmm2,(%rsi)
1459 pxor %xmm2,%xmm2
1460 jmp L$cbc_ret
1461.p2align 4
1462L$cbc_decrypt_bulk:
1463 leaq (%rsp),%r11
1464
1465 pushq %rbp
1466
1467 subq $16,%rsp
1468 andq $-16,%rsp
1469 movq %rcx,%rbp
1470 movups (%r8),%xmm10
1471 movl %r10d,%eax
1472 cmpq $0x50,%rdx
1473 jbe L$cbc_dec_tail
1474
1475 movups (%rcx),%xmm0
1476 movdqu 0(%rdi),%xmm2
1477 movdqu 16(%rdi),%xmm3
1478 movdqa %xmm2,%xmm11
1479 movdqu 32(%rdi),%xmm4
1480 movdqa %xmm3,%xmm12
1481 movdqu 48(%rdi),%xmm5
1482 movdqa %xmm4,%xmm13
1483 movdqu 64(%rdi),%xmm6
1484 movdqa %xmm5,%xmm14
1485 movdqu 80(%rdi),%xmm7
1486 movdqa %xmm6,%xmm15
1487 cmpq $0x70,%rdx
1488 jbe L$cbc_dec_six_or_seven
1489
1490 subq $0x70,%rdx
1491 leaq 112(%rcx),%rcx
1492 jmp L$cbc_dec_loop8_enter
1493.p2align 4
1494L$cbc_dec_loop8:
1495 movups %xmm9,(%rsi)
1496 leaq 16(%rsi),%rsi
1497L$cbc_dec_loop8_enter:
1498 movdqu 96(%rdi),%xmm8
1499 pxor %xmm0,%xmm2
1500 movdqu 112(%rdi),%xmm9
1501 pxor %xmm0,%xmm3
1502 movups 16-112(%rcx),%xmm1
1503 pxor %xmm0,%xmm4
1504 movq $-1,%rbp
1505 cmpq $0x70,%rdx
1506 pxor %xmm0,%xmm5
1507 pxor %xmm0,%xmm6
1508 pxor %xmm0,%xmm7
1509 pxor %xmm0,%xmm8
1510
1511.byte 102,15,56,222,209
1512 pxor %xmm0,%xmm9
1513 movups 32-112(%rcx),%xmm0
1514.byte 102,15,56,222,217
1515.byte 102,15,56,222,225
1516.byte 102,15,56,222,233
1517.byte 102,15,56,222,241
1518.byte 102,15,56,222,249
1519.byte 102,68,15,56,222,193
1520 adcq $0,%rbp
1521 andq $128,%rbp
1522.byte 102,68,15,56,222,201
1523 addq %rdi,%rbp
1524 movups 48-112(%rcx),%xmm1
1525.byte 102,15,56,222,208
1526.byte 102,15,56,222,216
1527.byte 102,15,56,222,224
1528.byte 102,15,56,222,232
1529.byte 102,15,56,222,240
1530.byte 102,15,56,222,248
1531.byte 102,68,15,56,222,192
1532.byte 102,68,15,56,222,200
1533 movups 64-112(%rcx),%xmm0
1534 nop
1535.byte 102,15,56,222,209
1536.byte 102,15,56,222,217
1537.byte 102,15,56,222,225
1538.byte 102,15,56,222,233
1539.byte 102,15,56,222,241
1540.byte 102,15,56,222,249
1541.byte 102,68,15,56,222,193
1542.byte 102,68,15,56,222,201
1543 movups 80-112(%rcx),%xmm1
1544 nop
1545.byte 102,15,56,222,208
1546.byte 102,15,56,222,216
1547.byte 102,15,56,222,224
1548.byte 102,15,56,222,232
1549.byte 102,15,56,222,240
1550.byte 102,15,56,222,248
1551.byte 102,68,15,56,222,192
1552.byte 102,68,15,56,222,200
1553 movups 96-112(%rcx),%xmm0
1554 nop
1555.byte 102,15,56,222,209
1556.byte 102,15,56,222,217
1557.byte 102,15,56,222,225
1558.byte 102,15,56,222,233
1559.byte 102,15,56,222,241
1560.byte 102,15,56,222,249
1561.byte 102,68,15,56,222,193
1562.byte 102,68,15,56,222,201
1563 movups 112-112(%rcx),%xmm1
1564 nop
1565.byte 102,15,56,222,208
1566.byte 102,15,56,222,216
1567.byte 102,15,56,222,224
1568.byte 102,15,56,222,232
1569.byte 102,15,56,222,240
1570.byte 102,15,56,222,248
1571.byte 102,68,15,56,222,192
1572.byte 102,68,15,56,222,200
1573 movups 128-112(%rcx),%xmm0
1574 nop
1575.byte 102,15,56,222,209
1576.byte 102,15,56,222,217
1577.byte 102,15,56,222,225
1578.byte 102,15,56,222,233
1579.byte 102,15,56,222,241
1580.byte 102,15,56,222,249
1581.byte 102,68,15,56,222,193
1582.byte 102,68,15,56,222,201
1583 movups 144-112(%rcx),%xmm1
1584 cmpl $11,%eax
1585.byte 102,15,56,222,208
1586.byte 102,15,56,222,216
1587.byte 102,15,56,222,224
1588.byte 102,15,56,222,232
1589.byte 102,15,56,222,240
1590.byte 102,15,56,222,248
1591.byte 102,68,15,56,222,192
1592.byte 102,68,15,56,222,200
1593 movups 160-112(%rcx),%xmm0
1594 jb L$cbc_dec_done
1595.byte 102,15,56,222,209
1596.byte 102,15,56,222,217
1597.byte 102,15,56,222,225
1598.byte 102,15,56,222,233
1599.byte 102,15,56,222,241
1600.byte 102,15,56,222,249
1601.byte 102,68,15,56,222,193
1602.byte 102,68,15,56,222,201
1603 movups 176-112(%rcx),%xmm1
1604 nop
1605.byte 102,15,56,222,208
1606.byte 102,15,56,222,216
1607.byte 102,15,56,222,224
1608.byte 102,15,56,222,232
1609.byte 102,15,56,222,240
1610.byte 102,15,56,222,248
1611.byte 102,68,15,56,222,192
1612.byte 102,68,15,56,222,200
1613 movups 192-112(%rcx),%xmm0
1614 je L$cbc_dec_done
1615.byte 102,15,56,222,209
1616.byte 102,15,56,222,217
1617.byte 102,15,56,222,225
1618.byte 102,15,56,222,233
1619.byte 102,15,56,222,241
1620.byte 102,15,56,222,249
1621.byte 102,68,15,56,222,193
1622.byte 102,68,15,56,222,201
1623 movups 208-112(%rcx),%xmm1
1624 nop
1625.byte 102,15,56,222,208
1626.byte 102,15,56,222,216
1627.byte 102,15,56,222,224
1628.byte 102,15,56,222,232
1629.byte 102,15,56,222,240
1630.byte 102,15,56,222,248
1631.byte 102,68,15,56,222,192
1632.byte 102,68,15,56,222,200
1633 movups 224-112(%rcx),%xmm0
1634 jmp L$cbc_dec_done
1635.p2align 4
1636L$cbc_dec_done:
1637.byte 102,15,56,222,209
1638.byte 102,15,56,222,217
1639 pxor %xmm0,%xmm10
1640 pxor %xmm0,%xmm11
1641.byte 102,15,56,222,225
1642.byte 102,15,56,222,233
1643 pxor %xmm0,%xmm12
1644 pxor %xmm0,%xmm13
1645.byte 102,15,56,222,241
1646.byte 102,15,56,222,249
1647 pxor %xmm0,%xmm14
1648 pxor %xmm0,%xmm15
1649.byte 102,68,15,56,222,193
1650.byte 102,68,15,56,222,201
1651 movdqu 80(%rdi),%xmm1
1652
1653.byte 102,65,15,56,223,210
1654 movdqu 96(%rdi),%xmm10
1655 pxor %xmm0,%xmm1
1656.byte 102,65,15,56,223,219
1657 pxor %xmm0,%xmm10
1658 movdqu 112(%rdi),%xmm0
1659.byte 102,65,15,56,223,228
1660 leaq 128(%rdi),%rdi
1661 movdqu 0(%rbp),%xmm11
1662.byte 102,65,15,56,223,237
1663.byte 102,65,15,56,223,246
1664 movdqu 16(%rbp),%xmm12
1665 movdqu 32(%rbp),%xmm13
1666.byte 102,65,15,56,223,255
1667.byte 102,68,15,56,223,193
1668 movdqu 48(%rbp),%xmm14
1669 movdqu 64(%rbp),%xmm15
1670.byte 102,69,15,56,223,202
1671 movdqa %xmm0,%xmm10
1672 movdqu 80(%rbp),%xmm1
1673 movups -112(%rcx),%xmm0
1674
1675 movups %xmm2,(%rsi)
1676 movdqa %xmm11,%xmm2
1677 movups %xmm3,16(%rsi)
1678 movdqa %xmm12,%xmm3
1679 movups %xmm4,32(%rsi)
1680 movdqa %xmm13,%xmm4
1681 movups %xmm5,48(%rsi)
1682 movdqa %xmm14,%xmm5
1683 movups %xmm6,64(%rsi)
1684 movdqa %xmm15,%xmm6
1685 movups %xmm7,80(%rsi)
1686 movdqa %xmm1,%xmm7
1687 movups %xmm8,96(%rsi)
1688 leaq 112(%rsi),%rsi
1689
1690 subq $0x80,%rdx
1691 ja L$cbc_dec_loop8
1692
1693 movaps %xmm9,%xmm2
1694 leaq -112(%rcx),%rcx
1695 addq $0x70,%rdx
1696 jle L$cbc_dec_clear_tail_collected
1697 movups %xmm9,(%rsi)
1698 leaq 16(%rsi),%rsi
1699 cmpq $0x50,%rdx
1700 jbe L$cbc_dec_tail
1701
1702 movaps %xmm11,%xmm2
1703L$cbc_dec_six_or_seven:
1704 cmpq $0x60,%rdx
1705 ja L$cbc_dec_seven
1706
1707 movaps %xmm7,%xmm8
1708 call _aesni_decrypt6
1709 pxor %xmm10,%xmm2
1710 movaps %xmm8,%xmm10
1711 pxor %xmm11,%xmm3
1712 movdqu %xmm2,(%rsi)
1713 pxor %xmm12,%xmm4
1714 movdqu %xmm3,16(%rsi)
1715 pxor %xmm3,%xmm3
1716 pxor %xmm13,%xmm5
1717 movdqu %xmm4,32(%rsi)
1718 pxor %xmm4,%xmm4
1719 pxor %xmm14,%xmm6
1720 movdqu %xmm5,48(%rsi)
1721 pxor %xmm5,%xmm5
1722 pxor %xmm15,%xmm7
1723 movdqu %xmm6,64(%rsi)
1724 pxor %xmm6,%xmm6
1725 leaq 80(%rsi),%rsi
1726 movdqa %xmm7,%xmm2
1727 pxor %xmm7,%xmm7
1728 jmp L$cbc_dec_tail_collected
1729
1730.p2align 4
1731L$cbc_dec_seven:
1732 movups 96(%rdi),%xmm8
1733 xorps %xmm9,%xmm9
1734 call _aesni_decrypt8
1735 movups 80(%rdi),%xmm9
1736 pxor %xmm10,%xmm2
1737 movups 96(%rdi),%xmm10
1738 pxor %xmm11,%xmm3
1739 movdqu %xmm2,(%rsi)
1740 pxor %xmm12,%xmm4
1741 movdqu %xmm3,16(%rsi)
1742 pxor %xmm3,%xmm3
1743 pxor %xmm13,%xmm5
1744 movdqu %xmm4,32(%rsi)
1745 pxor %xmm4,%xmm4
1746 pxor %xmm14,%xmm6
1747 movdqu %xmm5,48(%rsi)
1748 pxor %xmm5,%xmm5
1749 pxor %xmm15,%xmm7
1750 movdqu %xmm6,64(%rsi)
1751 pxor %xmm6,%xmm6
1752 pxor %xmm9,%xmm8
1753 movdqu %xmm7,80(%rsi)
1754 pxor %xmm7,%xmm7
1755 leaq 96(%rsi),%rsi
1756 movdqa %xmm8,%xmm2
1757 pxor %xmm8,%xmm8
1758 pxor %xmm9,%xmm9
1759 jmp L$cbc_dec_tail_collected
1760
1761L$cbc_dec_tail:
1762 movups (%rdi),%xmm2
1763 subq $0x10,%rdx
1764 jbe L$cbc_dec_one
1765
1766 movups 16(%rdi),%xmm3
1767 movaps %xmm2,%xmm11
1768 subq $0x10,%rdx
1769 jbe L$cbc_dec_two
1770
1771 movups 32(%rdi),%xmm4
1772 movaps %xmm3,%xmm12
1773 subq $0x10,%rdx
1774 jbe L$cbc_dec_three
1775
1776 movups 48(%rdi),%xmm5
1777 movaps %xmm4,%xmm13
1778 subq $0x10,%rdx
1779 jbe L$cbc_dec_four
1780
1781 movups 64(%rdi),%xmm6
1782 movaps %xmm5,%xmm14
1783 movaps %xmm6,%xmm15
1784 xorps %xmm7,%xmm7
1785 call _aesni_decrypt6
1786 pxor %xmm10,%xmm2
1787 movaps %xmm15,%xmm10
1788 pxor %xmm11,%xmm3
1789 movdqu %xmm2,(%rsi)
1790 pxor %xmm12,%xmm4
1791 movdqu %xmm3,16(%rsi)
1792 pxor %xmm3,%xmm3
1793 pxor %xmm13,%xmm5
1794 movdqu %xmm4,32(%rsi)
1795 pxor %xmm4,%xmm4
1796 pxor %xmm14,%xmm6
1797 movdqu %xmm5,48(%rsi)
1798 pxor %xmm5,%xmm5
1799 leaq 64(%rsi),%rsi
1800 movdqa %xmm6,%xmm2
1801 pxor %xmm6,%xmm6
1802 pxor %xmm7,%xmm7
1803 subq $0x10,%rdx
1804 jmp L$cbc_dec_tail_collected
1805
1806.p2align 4
1807L$cbc_dec_one:
1808 movaps %xmm2,%xmm11
1809 movups (%rcx),%xmm0
1810 movups 16(%rcx),%xmm1
1811 leaq 32(%rcx),%rcx
1812 xorps %xmm0,%xmm2
1813L$oop_dec1_8:
1814.byte 102,15,56,222,209
1815 decl %eax
1816 movups (%rcx),%xmm1
1817 leaq 16(%rcx),%rcx
1818 jnz L$oop_dec1_8
1819.byte 102,15,56,223,209
1820 xorps %xmm10,%xmm2
1821 movaps %xmm11,%xmm10
1822 jmp L$cbc_dec_tail_collected
1823.p2align 4
1824L$cbc_dec_two:
1825 movaps %xmm3,%xmm12
1826 call _aesni_decrypt2
1827 pxor %xmm10,%xmm2
1828 movaps %xmm12,%xmm10
1829 pxor %xmm11,%xmm3
1830 movdqu %xmm2,(%rsi)
1831 movdqa %xmm3,%xmm2
1832 pxor %xmm3,%xmm3
1833 leaq 16(%rsi),%rsi
1834 jmp L$cbc_dec_tail_collected
1835.p2align 4
1836L$cbc_dec_three:
1837 movaps %xmm4,%xmm13
1838 call _aesni_decrypt3
1839 pxor %xmm10,%xmm2
1840 movaps %xmm13,%xmm10
1841 pxor %xmm11,%xmm3
1842 movdqu %xmm2,(%rsi)
1843 pxor %xmm12,%xmm4
1844 movdqu %xmm3,16(%rsi)
1845 pxor %xmm3,%xmm3
1846 movdqa %xmm4,%xmm2
1847 pxor %xmm4,%xmm4
1848 leaq 32(%rsi),%rsi
1849 jmp L$cbc_dec_tail_collected
1850.p2align 4
1851L$cbc_dec_four:
1852 movaps %xmm5,%xmm14
1853 call _aesni_decrypt4
1854 pxor %xmm10,%xmm2
1855 movaps %xmm14,%xmm10
1856 pxor %xmm11,%xmm3
1857 movdqu %xmm2,(%rsi)
1858 pxor %xmm12,%xmm4
1859 movdqu %xmm3,16(%rsi)
1860 pxor %xmm3,%xmm3
1861 pxor %xmm13,%xmm5
1862 movdqu %xmm4,32(%rsi)
1863 pxor %xmm4,%xmm4
1864 movdqa %xmm5,%xmm2
1865 pxor %xmm5,%xmm5
1866 leaq 48(%rsi),%rsi
1867 jmp L$cbc_dec_tail_collected
1868
1869.p2align 4
1870L$cbc_dec_clear_tail_collected:
1871 pxor %xmm3,%xmm3
1872 pxor %xmm4,%xmm4
1873 pxor %xmm5,%xmm5
1874 pxor %xmm6,%xmm6
1875 pxor %xmm7,%xmm7
1876 pxor %xmm8,%xmm8
1877 pxor %xmm9,%xmm9
1878L$cbc_dec_tail_collected:
1879 movups %xmm10,(%r8)
1880 andq $15,%rdx
1881 jnz L$cbc_dec_tail_partial
1882 movups %xmm2,(%rsi)
1883 pxor %xmm2,%xmm2
1884 jmp L$cbc_dec_ret
1885.p2align 4
1886L$cbc_dec_tail_partial:
1887 movaps %xmm2,(%rsp)
1888 pxor %xmm2,%xmm2
1889 movq $16,%rcx
1890 movq %rsi,%rdi
1891 subq %rdx,%rcx
1892 leaq (%rsp),%rsi
1893.long 0x9066A4F3
1894 movdqa %xmm2,(%rsp)
1895
1896L$cbc_dec_ret:
1897 xorps %xmm0,%xmm0
1898 pxor %xmm1,%xmm1
1899 movq -8(%r11),%rbp
1900
1901 leaq (%r11),%rsp
1902
1903L$cbc_ret:
1904 ret
1905
1906
David Benjamin56fb43a2024-05-17 21:30:32 -04001907.globl _aes_hw_encrypt_key_to_decrypt_key
1908.private_extern _aes_hw_encrypt_key_to_decrypt_key
David Benjaminfe0c91e2024-03-18 15:37:24 +10001909
1910.p2align 4
David Benjamin56fb43a2024-05-17 21:30:32 -04001911_aes_hw_encrypt_key_to_decrypt_key:
David Benjaminbfcab2a2024-05-12 10:26:48 -04001912
David Benjaminfe0c91e2024-03-18 15:37:24 +10001913_CET_ENDBR
David Benjaminbfcab2a2024-05-12 10:26:48 -04001914
David Benjamin56fb43a2024-05-17 21:30:32 -04001915 movl 240(%rdi),%esi
David Benjaminfe0c91e2024-03-18 15:37:24 +10001916 shll $4,%esi
David Benjaminfe0c91e2024-03-18 15:37:24 +10001917
David Benjamin56fb43a2024-05-17 21:30:32 -04001918 leaq 16(%rdi,%rsi,1),%rdx
1919
1920 movups (%rdi),%xmm0
1921 movups (%rdx),%xmm1
1922 movups %xmm0,(%rdx)
1923 movups %xmm1,(%rdi)
1924 leaq 16(%rdi),%rdi
1925 leaq -16(%rdx),%rdx
David Benjaminfe0c91e2024-03-18 15:37:24 +10001926
1927L$dec_key_inverse:
David Benjamin56fb43a2024-05-17 21:30:32 -04001928 movups (%rdi),%xmm0
1929 movups (%rdx),%xmm1
David Benjaminfe0c91e2024-03-18 15:37:24 +10001930.byte 102,15,56,219,192
1931.byte 102,15,56,219,201
David Benjamin56fb43a2024-05-17 21:30:32 -04001932 leaq 16(%rdi),%rdi
1933 leaq -16(%rdx),%rdx
1934 movups %xmm0,16(%rdx)
1935 movups %xmm1,-16(%rdi)
1936 cmpq %rdi,%rdx
David Benjaminfe0c91e2024-03-18 15:37:24 +10001937 ja L$dec_key_inverse
1938
David Benjamin56fb43a2024-05-17 21:30:32 -04001939 movups (%rdi),%xmm0
David Benjaminfe0c91e2024-03-18 15:37:24 +10001940.byte 102,15,56,219,192
1941 pxor %xmm1,%xmm1
David Benjamin56fb43a2024-05-17 21:30:32 -04001942 movups %xmm0,(%rdx)
David Benjaminfe0c91e2024-03-18 15:37:24 +10001943 pxor %xmm0,%xmm0
David Benjaminfe0c91e2024-03-18 15:37:24 +10001944 ret
1945
David Benjaminbfcab2a2024-05-12 10:26:48 -04001946
David Benjamin962432c2024-05-18 09:47:39 -04001947.globl _aes_hw_set_encrypt_key_base
1948.private_extern _aes_hw_set_encrypt_key_base
David Benjaminfe0c91e2024-03-18 15:37:24 +10001949
1950.p2align 4
David Benjamin962432c2024-05-18 09:47:39 -04001951_aes_hw_set_encrypt_key_base:
David Benjaminfe0c91e2024-03-18 15:37:24 +10001952
David Benjaminbfcab2a2024-05-12 10:26:48 -04001953
David Benjaminfe0c91e2024-03-18 15:37:24 +10001954_CET_ENDBR
1955#ifdef BORINGSSL_DISPATCH_TEST
1956 movb $1,_BORINGSSL_function_hit+3(%rip)
1957#endif
David Benjaminbfcab2a2024-05-12 10:26:48 -04001958 subq $8,%rsp
1959
1960
David Benjaminfe0c91e2024-03-18 15:37:24 +10001961
David Benjaminfe0c91e2024-03-18 15:37:24 +10001962 movups (%rdi),%xmm0
1963 xorps %xmm4,%xmm4
David Benjaminfe0c91e2024-03-18 15:37:24 +10001964 leaq 16(%rdx),%rax
1965 cmpl $256,%esi
1966 je L$14rounds
1967 cmpl $192,%esi
1968 je L$12rounds
1969 cmpl $128,%esi
1970 jne L$bad_keybits
1971
1972L$10rounds:
1973 movl $9,%esi
David Benjaminfe0c91e2024-03-18 15:37:24 +10001974
1975 movups %xmm0,(%rdx)
1976.byte 102,15,58,223,200,1
1977 call L$key_expansion_128_cold
1978.byte 102,15,58,223,200,2
1979 call L$key_expansion_128
1980.byte 102,15,58,223,200,4
1981 call L$key_expansion_128
1982.byte 102,15,58,223,200,8
1983 call L$key_expansion_128
1984.byte 102,15,58,223,200,16
1985 call L$key_expansion_128
1986.byte 102,15,58,223,200,32
1987 call L$key_expansion_128
1988.byte 102,15,58,223,200,64
1989 call L$key_expansion_128
1990.byte 102,15,58,223,200,128
1991 call L$key_expansion_128
1992.byte 102,15,58,223,200,27
1993 call L$key_expansion_128
1994.byte 102,15,58,223,200,54
1995 call L$key_expansion_128
1996 movups %xmm0,(%rax)
1997 movl %esi,80(%rax)
1998 xorl %eax,%eax
1999 jmp L$enc_key_ret
2000
2001.p2align 4
David Benjaminfe0c91e2024-03-18 15:37:24 +10002002L$12rounds:
2003 movq 16(%rdi),%xmm2
2004 movl $11,%esi
David Benjaminfe0c91e2024-03-18 15:37:24 +10002005
2006 movups %xmm0,(%rdx)
2007.byte 102,15,58,223,202,1
2008 call L$key_expansion_192a_cold
2009.byte 102,15,58,223,202,2
2010 call L$key_expansion_192b
2011.byte 102,15,58,223,202,4
2012 call L$key_expansion_192a
2013.byte 102,15,58,223,202,8
2014 call L$key_expansion_192b
2015.byte 102,15,58,223,202,16
2016 call L$key_expansion_192a
2017.byte 102,15,58,223,202,32
2018 call L$key_expansion_192b
2019.byte 102,15,58,223,202,64
2020 call L$key_expansion_192a
2021.byte 102,15,58,223,202,128
2022 call L$key_expansion_192b
2023 movups %xmm0,(%rax)
2024 movl %esi,48(%rax)
2025 xorq %rax,%rax
2026 jmp L$enc_key_ret
2027
2028.p2align 4
David Benjaminfe0c91e2024-03-18 15:37:24 +10002029L$14rounds:
2030 movups 16(%rdi),%xmm2
2031 movl $13,%esi
2032 leaq 16(%rax),%rax
David Benjaminfe0c91e2024-03-18 15:37:24 +10002033
2034 movups %xmm0,(%rdx)
2035 movups %xmm2,16(%rdx)
2036.byte 102,15,58,223,202,1
2037 call L$key_expansion_256a_cold
2038.byte 102,15,58,223,200,1
2039 call L$key_expansion_256b
2040.byte 102,15,58,223,202,2
2041 call L$key_expansion_256a
2042.byte 102,15,58,223,200,2
2043 call L$key_expansion_256b
2044.byte 102,15,58,223,202,4
2045 call L$key_expansion_256a
2046.byte 102,15,58,223,200,4
2047 call L$key_expansion_256b
2048.byte 102,15,58,223,202,8
2049 call L$key_expansion_256a
2050.byte 102,15,58,223,200,8
2051 call L$key_expansion_256b
2052.byte 102,15,58,223,202,16
2053 call L$key_expansion_256a
2054.byte 102,15,58,223,200,16
2055 call L$key_expansion_256b
2056.byte 102,15,58,223,202,32
2057 call L$key_expansion_256a
2058.byte 102,15,58,223,200,32
2059 call L$key_expansion_256b
2060.byte 102,15,58,223,202,64
2061 call L$key_expansion_256a
2062 movups %xmm0,(%rax)
2063 movl %esi,16(%rax)
2064 xorq %rax,%rax
2065 jmp L$enc_key_ret
2066
2067.p2align 4
David Benjaminfe0c91e2024-03-18 15:37:24 +10002068L$bad_keybits:
2069 movq $-2,%rax
2070L$enc_key_ret:
2071 pxor %xmm0,%xmm0
2072 pxor %xmm1,%xmm1
2073 pxor %xmm2,%xmm2
2074 pxor %xmm3,%xmm3
2075 pxor %xmm4,%xmm4
2076 pxor %xmm5,%xmm5
2077 addq $8,%rsp
2078
2079 ret
2080
David Benjaminbfcab2a2024-05-12 10:26:48 -04002081
David Benjaminfe0c91e2024-03-18 15:37:24 +10002082
2083.p2align 4
2084L$key_expansion_128:
Adam Langley6ee4f9d2024-08-09 10:00:54 -07002085
David Benjaminfe0c91e2024-03-18 15:37:24 +10002086 movups %xmm0,(%rax)
2087 leaq 16(%rax),%rax
2088L$key_expansion_128_cold:
2089 shufps $16,%xmm0,%xmm4
2090 xorps %xmm4,%xmm0
2091 shufps $140,%xmm0,%xmm4
2092 xorps %xmm4,%xmm0
2093 shufps $255,%xmm1,%xmm1
2094 xorps %xmm1,%xmm0
2095 ret
2096
Adam Langley6ee4f9d2024-08-09 10:00:54 -07002097
David Benjaminfe0c91e2024-03-18 15:37:24 +10002098.p2align 4
2099L$key_expansion_192a:
Adam Langley6ee4f9d2024-08-09 10:00:54 -07002100
David Benjaminfe0c91e2024-03-18 15:37:24 +10002101 movups %xmm0,(%rax)
2102 leaq 16(%rax),%rax
2103L$key_expansion_192a_cold:
2104 movaps %xmm2,%xmm5
2105L$key_expansion_192b_warm:
2106 shufps $16,%xmm0,%xmm4
2107 movdqa %xmm2,%xmm3
2108 xorps %xmm4,%xmm0
2109 shufps $140,%xmm0,%xmm4
2110 pslldq $4,%xmm3
2111 xorps %xmm4,%xmm0
2112 pshufd $85,%xmm1,%xmm1
2113 pxor %xmm3,%xmm2
2114 pxor %xmm1,%xmm0
2115 pshufd $255,%xmm0,%xmm3
2116 pxor %xmm3,%xmm2
2117 ret
2118
Adam Langley6ee4f9d2024-08-09 10:00:54 -07002119
David Benjaminfe0c91e2024-03-18 15:37:24 +10002120.p2align 4
2121L$key_expansion_192b:
Adam Langley6ee4f9d2024-08-09 10:00:54 -07002122
David Benjaminfe0c91e2024-03-18 15:37:24 +10002123 movaps %xmm0,%xmm3
2124 shufps $68,%xmm0,%xmm5
2125 movups %xmm5,(%rax)
2126 shufps $78,%xmm2,%xmm3
2127 movups %xmm3,16(%rax)
2128 leaq 32(%rax),%rax
2129 jmp L$key_expansion_192b_warm
2130
Adam Langley6ee4f9d2024-08-09 10:00:54 -07002131
David Benjaminfe0c91e2024-03-18 15:37:24 +10002132.p2align 4
2133L$key_expansion_256a:
Adam Langley6ee4f9d2024-08-09 10:00:54 -07002134
David Benjaminfe0c91e2024-03-18 15:37:24 +10002135 movups %xmm2,(%rax)
2136 leaq 16(%rax),%rax
2137L$key_expansion_256a_cold:
2138 shufps $16,%xmm0,%xmm4
2139 xorps %xmm4,%xmm0
2140 shufps $140,%xmm0,%xmm4
2141 xorps %xmm4,%xmm0
2142 shufps $255,%xmm1,%xmm1
2143 xorps %xmm1,%xmm0
2144 ret
2145
Adam Langley6ee4f9d2024-08-09 10:00:54 -07002146
David Benjaminfe0c91e2024-03-18 15:37:24 +10002147.p2align 4
2148L$key_expansion_256b:
Adam Langley6ee4f9d2024-08-09 10:00:54 -07002149
David Benjaminfe0c91e2024-03-18 15:37:24 +10002150 movups %xmm0,(%rax)
2151 leaq 16(%rax),%rax
2152
2153 shufps $16,%xmm2,%xmm4
2154 xorps %xmm4,%xmm2
2155 shufps $140,%xmm2,%xmm4
2156 xorps %xmm4,%xmm2
2157 shufps $170,%xmm1,%xmm1
2158 xorps %xmm1,%xmm2
2159 ret
2160
David Benjamin962432c2024-05-18 09:47:39 -04002161
Adam Langley6ee4f9d2024-08-09 10:00:54 -07002162
David Benjamin962432c2024-05-18 09:47:39 -04002163.globl _aes_hw_set_encrypt_key_alt
2164.private_extern _aes_hw_set_encrypt_key_alt
2165
2166.p2align 4
2167_aes_hw_set_encrypt_key_alt:
2168
2169
2170_CET_ENDBR
2171#ifdef BORINGSSL_DISPATCH_TEST
2172 movb $1,_BORINGSSL_function_hit+3(%rip)
2173#endif
2174 subq $8,%rsp
2175
2176
2177
2178 movups (%rdi),%xmm0
2179 xorps %xmm4,%xmm4
2180 leaq 16(%rdx),%rax
2181 cmpl $256,%esi
2182 je L$14rounds_alt
2183 cmpl $192,%esi
2184 je L$12rounds_alt
2185 cmpl $128,%esi
2186 jne L$bad_keybits_alt
2187
2188 movl $9,%esi
2189 movdqa L$key_rotate(%rip),%xmm5
2190 movl $8,%r10d
2191 movdqa L$key_rcon1(%rip),%xmm4
2192 movdqa %xmm0,%xmm2
2193 movdqu %xmm0,(%rdx)
2194 jmp L$oop_key128
2195
2196.p2align 4
2197L$oop_key128:
2198.byte 102,15,56,0,197
2199.byte 102,15,56,221,196
2200 pslld $1,%xmm4
2201 leaq 16(%rax),%rax
2202
2203 movdqa %xmm2,%xmm3
2204 pslldq $4,%xmm2
2205 pxor %xmm2,%xmm3
2206 pslldq $4,%xmm2
2207 pxor %xmm2,%xmm3
2208 pslldq $4,%xmm2
2209 pxor %xmm3,%xmm2
2210
2211 pxor %xmm2,%xmm0
2212 movdqu %xmm0,-16(%rax)
2213 movdqa %xmm0,%xmm2
2214
2215 decl %r10d
2216 jnz L$oop_key128
2217
2218 movdqa L$key_rcon1b(%rip),%xmm4
2219
2220.byte 102,15,56,0,197
2221.byte 102,15,56,221,196
2222 pslld $1,%xmm4
2223
2224 movdqa %xmm2,%xmm3
2225 pslldq $4,%xmm2
2226 pxor %xmm2,%xmm3
2227 pslldq $4,%xmm2
2228 pxor %xmm2,%xmm3
2229 pslldq $4,%xmm2
2230 pxor %xmm3,%xmm2
2231
2232 pxor %xmm2,%xmm0
2233 movdqu %xmm0,(%rax)
2234
2235 movdqa %xmm0,%xmm2
2236.byte 102,15,56,0,197
2237.byte 102,15,56,221,196
2238
2239 movdqa %xmm2,%xmm3
2240 pslldq $4,%xmm2
2241 pxor %xmm2,%xmm3
2242 pslldq $4,%xmm2
2243 pxor %xmm2,%xmm3
2244 pslldq $4,%xmm2
2245 pxor %xmm3,%xmm2
2246
2247 pxor %xmm2,%xmm0
2248 movdqu %xmm0,16(%rax)
2249
2250 movl %esi,96(%rax)
2251 xorl %eax,%eax
2252 jmp L$enc_key_ret_alt
2253
2254.p2align 4
2255L$12rounds_alt:
2256 movq 16(%rdi),%xmm2
2257 movl $11,%esi
2258 movdqa L$key_rotate192(%rip),%xmm5
2259 movdqa L$key_rcon1(%rip),%xmm4
2260 movl $8,%r10d
2261 movdqu %xmm0,(%rdx)
2262 jmp L$oop_key192
2263
2264.p2align 4
2265L$oop_key192:
2266 movq %xmm2,0(%rax)
2267 movdqa %xmm2,%xmm1
2268.byte 102,15,56,0,213
2269.byte 102,15,56,221,212
2270 pslld $1,%xmm4
2271 leaq 24(%rax),%rax
2272
2273 movdqa %xmm0,%xmm3
2274 pslldq $4,%xmm0
2275 pxor %xmm0,%xmm3
2276 pslldq $4,%xmm0
2277 pxor %xmm0,%xmm3
2278 pslldq $4,%xmm0
2279 pxor %xmm3,%xmm0
2280
2281 pshufd $0xff,%xmm0,%xmm3
2282 pxor %xmm1,%xmm3
2283 pslldq $4,%xmm1
2284 pxor %xmm1,%xmm3
2285
2286 pxor %xmm2,%xmm0
2287 pxor %xmm3,%xmm2
2288 movdqu %xmm0,-16(%rax)
2289
2290 decl %r10d
2291 jnz L$oop_key192
2292
2293 movl %esi,32(%rax)
2294 xorl %eax,%eax
2295 jmp L$enc_key_ret_alt
2296
2297.p2align 4
2298L$14rounds_alt:
2299 movups 16(%rdi),%xmm2
2300 movl $13,%esi
2301 leaq 16(%rax),%rax
2302 movdqa L$key_rotate(%rip),%xmm5
2303 movdqa L$key_rcon1(%rip),%xmm4
2304 movl $7,%r10d
2305 movdqu %xmm0,0(%rdx)
2306 movdqa %xmm2,%xmm1
2307 movdqu %xmm2,16(%rdx)
2308 jmp L$oop_key256
2309
2310.p2align 4
2311L$oop_key256:
2312.byte 102,15,56,0,213
2313.byte 102,15,56,221,212
2314
2315 movdqa %xmm0,%xmm3
2316 pslldq $4,%xmm0
2317 pxor %xmm0,%xmm3
2318 pslldq $4,%xmm0
2319 pxor %xmm0,%xmm3
2320 pslldq $4,%xmm0
2321 pxor %xmm3,%xmm0
2322 pslld $1,%xmm4
2323
2324 pxor %xmm2,%xmm0
2325 movdqu %xmm0,(%rax)
2326
2327 decl %r10d
2328 jz L$done_key256
2329
2330 pshufd $0xff,%xmm0,%xmm2
2331 pxor %xmm3,%xmm3
2332.byte 102,15,56,221,211
2333
2334 movdqa %xmm1,%xmm3
2335 pslldq $4,%xmm1
2336 pxor %xmm1,%xmm3
2337 pslldq $4,%xmm1
2338 pxor %xmm1,%xmm3
2339 pslldq $4,%xmm1
2340 pxor %xmm3,%xmm1
2341
2342 pxor %xmm1,%xmm2
2343 movdqu %xmm2,16(%rax)
2344 leaq 32(%rax),%rax
2345 movdqa %xmm2,%xmm1
2346
2347 jmp L$oop_key256
2348
2349L$done_key256:
2350 movl %esi,16(%rax)
2351 xorl %eax,%eax
2352 jmp L$enc_key_ret_alt
2353
2354.p2align 4
2355L$bad_keybits_alt:
2356 movq $-2,%rax
2357L$enc_key_ret_alt:
2358 pxor %xmm0,%xmm0
2359 pxor %xmm1,%xmm1
2360 pxor %xmm2,%xmm2
2361 pxor %xmm3,%xmm3
2362 pxor %xmm4,%xmm4
2363 pxor %xmm5,%xmm5
2364 addq $8,%rsp
2365
2366 ret
2367
2368
2369
David Benjaminfe0c91e2024-03-18 15:37:24 +10002370.section __DATA,__const
2371.p2align 6
2372L$bswap_mask:
2373.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2374L$increment32:
2375.long 6,6,6,0
2376L$increment64:
2377.long 1,0,0,0
2378L$xts_magic:
2379.long 0x87,0,1,0
2380L$increment1:
2381.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2382L$key_rotate:
2383.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
2384L$key_rotate192:
2385.long 0x04070605,0x04070605,0x04070605,0x04070605
2386L$key_rcon1:
2387.long 1,1,1,1
2388L$key_rcon1b:
2389.long 0x1b,0x1b,0x1b,0x1b
2390
2391.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2392.p2align 6
2393.text
2394#endif