blob: 48293da91857a5db3825ea466beb917a58d566ca [file] [log] [blame]
David Benjaminfe0c91e2024-03-18 15:37:24 +10001// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
7.text
8.globl _ChaCha20_ctr32_nohw
9.private_extern _ChaCha20_ctr32_nohw
10.align 4
11_ChaCha20_ctr32_nohw:
12L_ChaCha20_ctr32_nohw_begin:
13 pushl %ebp
14 pushl %ebx
15 pushl %esi
16 pushl %edi
17 movl 32(%esp),%esi
18 movl 36(%esp),%edi
19 subl $132,%esp
20 movl (%esi),%eax
21 movl 4(%esi),%ebx
22 movl 8(%esi),%ecx
23 movl 12(%esi),%edx
24 movl %eax,80(%esp)
25 movl %ebx,84(%esp)
26 movl %ecx,88(%esp)
27 movl %edx,92(%esp)
28 movl 16(%esi),%eax
29 movl 20(%esi),%ebx
30 movl 24(%esi),%ecx
31 movl 28(%esi),%edx
32 movl %eax,96(%esp)
33 movl %ebx,100(%esp)
34 movl %ecx,104(%esp)
35 movl %edx,108(%esp)
36 movl (%edi),%eax
37 movl 4(%edi),%ebx
38 movl 8(%edi),%ecx
39 movl 12(%edi),%edx
40 subl $1,%eax
41 movl %eax,112(%esp)
42 movl %ebx,116(%esp)
43 movl %ecx,120(%esp)
44 movl %edx,124(%esp)
45 jmp L000entry
46.align 4,0x90
47L001outer_loop:
48 movl %ebx,156(%esp)
49 movl %eax,152(%esp)
50 movl %ecx,160(%esp)
51L000entry:
52 movl $1634760805,%eax
53 movl $857760878,4(%esp)
54 movl $2036477234,8(%esp)
55 movl $1797285236,12(%esp)
56 movl 84(%esp),%ebx
57 movl 88(%esp),%ebp
58 movl 104(%esp),%ecx
59 movl 108(%esp),%esi
60 movl 116(%esp),%edx
61 movl 120(%esp),%edi
62 movl %ebx,20(%esp)
63 movl %ebp,24(%esp)
64 movl %ecx,40(%esp)
65 movl %esi,44(%esp)
66 movl %edx,52(%esp)
67 movl %edi,56(%esp)
68 movl 92(%esp),%ebx
69 movl 124(%esp),%edi
70 movl 112(%esp),%edx
71 movl 80(%esp),%ebp
72 movl 96(%esp),%ecx
73 movl 100(%esp),%esi
74 addl $1,%edx
75 movl %ebx,28(%esp)
76 movl %edi,60(%esp)
77 movl %edx,112(%esp)
78 movl $10,%ebx
79 jmp L002loop
80.align 4,0x90
81L002loop:
82 addl %ebp,%eax
83 movl %ebx,128(%esp)
84 movl %ebp,%ebx
85 xorl %eax,%edx
86 roll $16,%edx
87 addl %edx,%ecx
88 xorl %ecx,%ebx
89 movl 52(%esp),%edi
90 roll $12,%ebx
91 movl 20(%esp),%ebp
92 addl %ebx,%eax
93 xorl %eax,%edx
94 movl %eax,(%esp)
95 roll $8,%edx
96 movl 4(%esp),%eax
97 addl %edx,%ecx
98 movl %edx,48(%esp)
99 xorl %ecx,%ebx
100 addl %ebp,%eax
101 roll $7,%ebx
102 xorl %eax,%edi
103 movl %ecx,32(%esp)
104 roll $16,%edi
105 movl %ebx,16(%esp)
106 addl %edi,%esi
107 movl 40(%esp),%ecx
108 xorl %esi,%ebp
109 movl 56(%esp),%edx
110 roll $12,%ebp
111 movl 24(%esp),%ebx
112 addl %ebp,%eax
113 xorl %eax,%edi
114 movl %eax,4(%esp)
115 roll $8,%edi
116 movl 8(%esp),%eax
117 addl %edi,%esi
118 movl %edi,52(%esp)
119 xorl %esi,%ebp
120 addl %ebx,%eax
121 roll $7,%ebp
122 xorl %eax,%edx
123 movl %esi,36(%esp)
124 roll $16,%edx
125 movl %ebp,20(%esp)
126 addl %edx,%ecx
127 movl 44(%esp),%esi
128 xorl %ecx,%ebx
129 movl 60(%esp),%edi
130 roll $12,%ebx
131 movl 28(%esp),%ebp
132 addl %ebx,%eax
133 xorl %eax,%edx
134 movl %eax,8(%esp)
135 roll $8,%edx
136 movl 12(%esp),%eax
137 addl %edx,%ecx
138 movl %edx,56(%esp)
139 xorl %ecx,%ebx
140 addl %ebp,%eax
141 roll $7,%ebx
142 xorl %eax,%edi
143 roll $16,%edi
144 movl %ebx,24(%esp)
145 addl %edi,%esi
146 xorl %esi,%ebp
147 roll $12,%ebp
148 movl 20(%esp),%ebx
149 addl %ebp,%eax
150 xorl %eax,%edi
151 movl %eax,12(%esp)
152 roll $8,%edi
153 movl (%esp),%eax
154 addl %edi,%esi
155 movl %edi,%edx
156 xorl %esi,%ebp
157 addl %ebx,%eax
158 roll $7,%ebp
159 xorl %eax,%edx
160 roll $16,%edx
161 movl %ebp,28(%esp)
162 addl %edx,%ecx
163 xorl %ecx,%ebx
164 movl 48(%esp),%edi
165 roll $12,%ebx
166 movl 24(%esp),%ebp
167 addl %ebx,%eax
168 xorl %eax,%edx
169 movl %eax,(%esp)
170 roll $8,%edx
171 movl 4(%esp),%eax
172 addl %edx,%ecx
173 movl %edx,60(%esp)
174 xorl %ecx,%ebx
175 addl %ebp,%eax
176 roll $7,%ebx
177 xorl %eax,%edi
178 movl %ecx,40(%esp)
179 roll $16,%edi
180 movl %ebx,20(%esp)
181 addl %edi,%esi
182 movl 32(%esp),%ecx
183 xorl %esi,%ebp
184 movl 52(%esp),%edx
185 roll $12,%ebp
186 movl 28(%esp),%ebx
187 addl %ebp,%eax
188 xorl %eax,%edi
189 movl %eax,4(%esp)
190 roll $8,%edi
191 movl 8(%esp),%eax
192 addl %edi,%esi
193 movl %edi,48(%esp)
194 xorl %esi,%ebp
195 addl %ebx,%eax
196 roll $7,%ebp
197 xorl %eax,%edx
198 movl %esi,44(%esp)
199 roll $16,%edx
200 movl %ebp,24(%esp)
201 addl %edx,%ecx
202 movl 36(%esp),%esi
203 xorl %ecx,%ebx
204 movl 56(%esp),%edi
205 roll $12,%ebx
206 movl 16(%esp),%ebp
207 addl %ebx,%eax
208 xorl %eax,%edx
209 movl %eax,8(%esp)
210 roll $8,%edx
211 movl 12(%esp),%eax
212 addl %edx,%ecx
213 movl %edx,52(%esp)
214 xorl %ecx,%ebx
215 addl %ebp,%eax
216 roll $7,%ebx
217 xorl %eax,%edi
218 roll $16,%edi
219 movl %ebx,28(%esp)
220 addl %edi,%esi
221 xorl %esi,%ebp
222 movl 48(%esp),%edx
223 roll $12,%ebp
224 movl 128(%esp),%ebx
225 addl %ebp,%eax
226 xorl %eax,%edi
227 movl %eax,12(%esp)
228 roll $8,%edi
229 movl (%esp),%eax
230 addl %edi,%esi
231 movl %edi,56(%esp)
232 xorl %esi,%ebp
233 roll $7,%ebp
234 decl %ebx
235 jnz L002loop
236 movl 160(%esp),%ebx
237 addl $1634760805,%eax
238 addl 80(%esp),%ebp
239 addl 96(%esp),%ecx
240 addl 100(%esp),%esi
241 cmpl $64,%ebx
242 jb L003tail
243 movl 156(%esp),%ebx
244 addl 112(%esp),%edx
245 addl 120(%esp),%edi
246 xorl (%ebx),%eax
247 xorl 16(%ebx),%ebp
248 movl %eax,(%esp)
249 movl 152(%esp),%eax
250 xorl 32(%ebx),%ecx
251 xorl 36(%ebx),%esi
252 xorl 48(%ebx),%edx
253 xorl 56(%ebx),%edi
254 movl %ebp,16(%eax)
255 movl %ecx,32(%eax)
256 movl %esi,36(%eax)
257 movl %edx,48(%eax)
258 movl %edi,56(%eax)
259 movl 4(%esp),%ebp
260 movl 8(%esp),%ecx
261 movl 12(%esp),%esi
262 movl 20(%esp),%edx
263 movl 24(%esp),%edi
264 addl $857760878,%ebp
265 addl $2036477234,%ecx
266 addl $1797285236,%esi
267 addl 84(%esp),%edx
268 addl 88(%esp),%edi
269 xorl 4(%ebx),%ebp
270 xorl 8(%ebx),%ecx
271 xorl 12(%ebx),%esi
272 xorl 20(%ebx),%edx
273 xorl 24(%ebx),%edi
274 movl %ebp,4(%eax)
275 movl %ecx,8(%eax)
276 movl %esi,12(%eax)
277 movl %edx,20(%eax)
278 movl %edi,24(%eax)
279 movl 28(%esp),%ebp
280 movl 40(%esp),%ecx
281 movl 44(%esp),%esi
282 movl 52(%esp),%edx
283 movl 60(%esp),%edi
284 addl 92(%esp),%ebp
285 addl 104(%esp),%ecx
286 addl 108(%esp),%esi
287 addl 116(%esp),%edx
288 addl 124(%esp),%edi
289 xorl 28(%ebx),%ebp
290 xorl 40(%ebx),%ecx
291 xorl 44(%ebx),%esi
292 xorl 52(%ebx),%edx
293 xorl 60(%ebx),%edi
294 leal 64(%ebx),%ebx
295 movl %ebp,28(%eax)
296 movl (%esp),%ebp
297 movl %ecx,40(%eax)
298 movl 160(%esp),%ecx
299 movl %esi,44(%eax)
300 movl %edx,52(%eax)
301 movl %edi,60(%eax)
302 movl %ebp,(%eax)
303 leal 64(%eax),%eax
304 subl $64,%ecx
305 jnz L001outer_loop
306 jmp L004done
307L003tail:
308 addl 112(%esp),%edx
309 addl 120(%esp),%edi
310 movl %eax,(%esp)
311 movl %ebp,16(%esp)
312 movl %ecx,32(%esp)
313 movl %esi,36(%esp)
314 movl %edx,48(%esp)
315 movl %edi,56(%esp)
316 movl 4(%esp),%ebp
317 movl 8(%esp),%ecx
318 movl 12(%esp),%esi
319 movl 20(%esp),%edx
320 movl 24(%esp),%edi
321 addl $857760878,%ebp
322 addl $2036477234,%ecx
323 addl $1797285236,%esi
324 addl 84(%esp),%edx
325 addl 88(%esp),%edi
326 movl %ebp,4(%esp)
327 movl %ecx,8(%esp)
328 movl %esi,12(%esp)
329 movl %edx,20(%esp)
330 movl %edi,24(%esp)
331 movl 28(%esp),%ebp
332 movl 40(%esp),%ecx
333 movl 44(%esp),%esi
334 movl 52(%esp),%edx
335 movl 60(%esp),%edi
336 addl 92(%esp),%ebp
337 addl 104(%esp),%ecx
338 addl 108(%esp),%esi
339 addl 116(%esp),%edx
340 addl 124(%esp),%edi
341 movl %ebp,28(%esp)
342 movl 156(%esp),%ebp
343 movl %ecx,40(%esp)
344 movl 152(%esp),%ecx
345 movl %esi,44(%esp)
346 xorl %esi,%esi
347 movl %edx,52(%esp)
348 movl %edi,60(%esp)
349 xorl %eax,%eax
350 xorl %edx,%edx
351L005tail_loop:
352 movb (%esi,%ebp,1),%al
353 movb (%esp,%esi,1),%dl
354 leal 1(%esi),%esi
355 xorb %dl,%al
356 movb %al,-1(%ecx,%esi,1)
357 decl %ebx
358 jnz L005tail_loop
359L004done:
360 addl $132,%esp
361 popl %edi
362 popl %esi
363 popl %ebx
364 popl %ebp
365 ret
366.globl _ChaCha20_ctr32_ssse3
367.private_extern _ChaCha20_ctr32_ssse3
368.align 4
369_ChaCha20_ctr32_ssse3:
370L_ChaCha20_ctr32_ssse3_begin:
371 pushl %ebp
372 pushl %ebx
373 pushl %esi
374 pushl %edi
375 call Lpic_point
376Lpic_point:
377 popl %eax
378 movl 20(%esp),%edi
379 movl 24(%esp),%esi
380 movl 28(%esp),%ecx
381 movl 32(%esp),%edx
382 movl 36(%esp),%ebx
383 movl %esp,%ebp
384 subl $524,%esp
385 andl $-64,%esp
386 movl %ebp,512(%esp)
387 leal Lssse3_data-Lpic_point(%eax),%eax
388 movdqu (%ebx),%xmm3
389 cmpl $256,%ecx
390 jb L0061x
391 movl %edx,516(%esp)
392 movl %ebx,520(%esp)
393 subl $256,%ecx
394 leal 384(%esp),%ebp
395 movdqu (%edx),%xmm7
396 pshufd $0,%xmm3,%xmm0
397 pshufd $85,%xmm3,%xmm1
398 pshufd $170,%xmm3,%xmm2
399 pshufd $255,%xmm3,%xmm3
400 paddd 48(%eax),%xmm0
401 pshufd $0,%xmm7,%xmm4
402 pshufd $85,%xmm7,%xmm5
403 psubd 64(%eax),%xmm0
404 pshufd $170,%xmm7,%xmm6
405 pshufd $255,%xmm7,%xmm7
406 movdqa %xmm0,64(%ebp)
407 movdqa %xmm1,80(%ebp)
408 movdqa %xmm2,96(%ebp)
409 movdqa %xmm3,112(%ebp)
410 movdqu 16(%edx),%xmm3
411 movdqa %xmm4,-64(%ebp)
412 movdqa %xmm5,-48(%ebp)
413 movdqa %xmm6,-32(%ebp)
414 movdqa %xmm7,-16(%ebp)
415 movdqa 32(%eax),%xmm7
416 leal 128(%esp),%ebx
417 pshufd $0,%xmm3,%xmm0
418 pshufd $85,%xmm3,%xmm1
419 pshufd $170,%xmm3,%xmm2
420 pshufd $255,%xmm3,%xmm3
421 pshufd $0,%xmm7,%xmm4
422 pshufd $85,%xmm7,%xmm5
423 pshufd $170,%xmm7,%xmm6
424 pshufd $255,%xmm7,%xmm7
425 movdqa %xmm0,(%ebp)
426 movdqa %xmm1,16(%ebp)
427 movdqa %xmm2,32(%ebp)
428 movdqa %xmm3,48(%ebp)
429 movdqa %xmm4,-128(%ebp)
430 movdqa %xmm5,-112(%ebp)
431 movdqa %xmm6,-96(%ebp)
432 movdqa %xmm7,-80(%ebp)
433 leal 128(%esi),%esi
434 leal 128(%edi),%edi
435 jmp L007outer_loop
436.align 4,0x90
437L007outer_loop:
438 movdqa -112(%ebp),%xmm1
439 movdqa -96(%ebp),%xmm2
440 movdqa -80(%ebp),%xmm3
441 movdqa -48(%ebp),%xmm5
442 movdqa -32(%ebp),%xmm6
443 movdqa -16(%ebp),%xmm7
444 movdqa %xmm1,-112(%ebx)
445 movdqa %xmm2,-96(%ebx)
446 movdqa %xmm3,-80(%ebx)
447 movdqa %xmm5,-48(%ebx)
448 movdqa %xmm6,-32(%ebx)
449 movdqa %xmm7,-16(%ebx)
450 movdqa 32(%ebp),%xmm2
451 movdqa 48(%ebp),%xmm3
452 movdqa 64(%ebp),%xmm4
453 movdqa 80(%ebp),%xmm5
454 movdqa 96(%ebp),%xmm6
455 movdqa 112(%ebp),%xmm7
456 paddd 64(%eax),%xmm4
457 movdqa %xmm2,32(%ebx)
458 movdqa %xmm3,48(%ebx)
459 movdqa %xmm4,64(%ebx)
460 movdqa %xmm5,80(%ebx)
461 movdqa %xmm6,96(%ebx)
462 movdqa %xmm7,112(%ebx)
463 movdqa %xmm4,64(%ebp)
464 movdqa -128(%ebp),%xmm0
465 movdqa %xmm4,%xmm6
466 movdqa -64(%ebp),%xmm3
467 movdqa (%ebp),%xmm4
468 movdqa 16(%ebp),%xmm5
469 movl $10,%edx
470 nop
471.align 4,0x90
472L008loop:
473 paddd %xmm3,%xmm0
474 movdqa %xmm3,%xmm2
475 pxor %xmm0,%xmm6
476 pshufb (%eax),%xmm6
477 paddd %xmm6,%xmm4
478 pxor %xmm4,%xmm2
479 movdqa -48(%ebx),%xmm3
480 movdqa %xmm2,%xmm1
481 pslld $12,%xmm2
482 psrld $20,%xmm1
483 por %xmm1,%xmm2
484 movdqa -112(%ebx),%xmm1
485 paddd %xmm2,%xmm0
486 movdqa 80(%ebx),%xmm7
487 pxor %xmm0,%xmm6
488 movdqa %xmm0,-128(%ebx)
489 pshufb 16(%eax),%xmm6
490 paddd %xmm6,%xmm4
491 movdqa %xmm6,64(%ebx)
492 pxor %xmm4,%xmm2
493 paddd %xmm3,%xmm1
494 movdqa %xmm2,%xmm0
495 pslld $7,%xmm2
496 psrld $25,%xmm0
497 pxor %xmm1,%xmm7
498 por %xmm0,%xmm2
499 movdqa %xmm4,(%ebx)
500 pshufb (%eax),%xmm7
501 movdqa %xmm2,-64(%ebx)
502 paddd %xmm7,%xmm5
503 movdqa 32(%ebx),%xmm4
504 pxor %xmm5,%xmm3
505 movdqa -32(%ebx),%xmm2
506 movdqa %xmm3,%xmm0
507 pslld $12,%xmm3
508 psrld $20,%xmm0
509 por %xmm0,%xmm3
510 movdqa -96(%ebx),%xmm0
511 paddd %xmm3,%xmm1
512 movdqa 96(%ebx),%xmm6
513 pxor %xmm1,%xmm7
514 movdqa %xmm1,-112(%ebx)
515 pshufb 16(%eax),%xmm7
516 paddd %xmm7,%xmm5
517 movdqa %xmm7,80(%ebx)
518 pxor %xmm5,%xmm3
519 paddd %xmm2,%xmm0
520 movdqa %xmm3,%xmm1
521 pslld $7,%xmm3
522 psrld $25,%xmm1
523 pxor %xmm0,%xmm6
524 por %xmm1,%xmm3
525 movdqa %xmm5,16(%ebx)
526 pshufb (%eax),%xmm6
527 movdqa %xmm3,-48(%ebx)
528 paddd %xmm6,%xmm4
529 movdqa 48(%ebx),%xmm5
530 pxor %xmm4,%xmm2
531 movdqa -16(%ebx),%xmm3
532 movdqa %xmm2,%xmm1
533 pslld $12,%xmm2
534 psrld $20,%xmm1
535 por %xmm1,%xmm2
536 movdqa -80(%ebx),%xmm1
537 paddd %xmm2,%xmm0
538 movdqa 112(%ebx),%xmm7
539 pxor %xmm0,%xmm6
540 movdqa %xmm0,-96(%ebx)
541 pshufb 16(%eax),%xmm6
542 paddd %xmm6,%xmm4
543 movdqa %xmm6,96(%ebx)
544 pxor %xmm4,%xmm2
545 paddd %xmm3,%xmm1
546 movdqa %xmm2,%xmm0
547 pslld $7,%xmm2
548 psrld $25,%xmm0
549 pxor %xmm1,%xmm7
550 por %xmm0,%xmm2
551 pshufb (%eax),%xmm7
552 movdqa %xmm2,-32(%ebx)
553 paddd %xmm7,%xmm5
554 pxor %xmm5,%xmm3
555 movdqa -48(%ebx),%xmm2
556 movdqa %xmm3,%xmm0
557 pslld $12,%xmm3
558 psrld $20,%xmm0
559 por %xmm0,%xmm3
560 movdqa -128(%ebx),%xmm0
561 paddd %xmm3,%xmm1
562 pxor %xmm1,%xmm7
563 movdqa %xmm1,-80(%ebx)
564 pshufb 16(%eax),%xmm7
565 paddd %xmm7,%xmm5
566 movdqa %xmm7,%xmm6
567 pxor %xmm5,%xmm3
568 paddd %xmm2,%xmm0
569 movdqa %xmm3,%xmm1
570 pslld $7,%xmm3
571 psrld $25,%xmm1
572 pxor %xmm0,%xmm6
573 por %xmm1,%xmm3
574 pshufb (%eax),%xmm6
575 movdqa %xmm3,-16(%ebx)
576 paddd %xmm6,%xmm4
577 pxor %xmm4,%xmm2
578 movdqa -32(%ebx),%xmm3
579 movdqa %xmm2,%xmm1
580 pslld $12,%xmm2
581 psrld $20,%xmm1
582 por %xmm1,%xmm2
583 movdqa -112(%ebx),%xmm1
584 paddd %xmm2,%xmm0
585 movdqa 64(%ebx),%xmm7
586 pxor %xmm0,%xmm6
587 movdqa %xmm0,-128(%ebx)
588 pshufb 16(%eax),%xmm6
589 paddd %xmm6,%xmm4
590 movdqa %xmm6,112(%ebx)
591 pxor %xmm4,%xmm2
592 paddd %xmm3,%xmm1
593 movdqa %xmm2,%xmm0
594 pslld $7,%xmm2
595 psrld $25,%xmm0
596 pxor %xmm1,%xmm7
597 por %xmm0,%xmm2
598 movdqa %xmm4,32(%ebx)
599 pshufb (%eax),%xmm7
600 movdqa %xmm2,-48(%ebx)
601 paddd %xmm7,%xmm5
602 movdqa (%ebx),%xmm4
603 pxor %xmm5,%xmm3
604 movdqa -16(%ebx),%xmm2
605 movdqa %xmm3,%xmm0
606 pslld $12,%xmm3
607 psrld $20,%xmm0
608 por %xmm0,%xmm3
609 movdqa -96(%ebx),%xmm0
610 paddd %xmm3,%xmm1
611 movdqa 80(%ebx),%xmm6
612 pxor %xmm1,%xmm7
613 movdqa %xmm1,-112(%ebx)
614 pshufb 16(%eax),%xmm7
615 paddd %xmm7,%xmm5
616 movdqa %xmm7,64(%ebx)
617 pxor %xmm5,%xmm3
618 paddd %xmm2,%xmm0
619 movdqa %xmm3,%xmm1
620 pslld $7,%xmm3
621 psrld $25,%xmm1
622 pxor %xmm0,%xmm6
623 por %xmm1,%xmm3
624 movdqa %xmm5,48(%ebx)
625 pshufb (%eax),%xmm6
626 movdqa %xmm3,-32(%ebx)
627 paddd %xmm6,%xmm4
628 movdqa 16(%ebx),%xmm5
629 pxor %xmm4,%xmm2
630 movdqa -64(%ebx),%xmm3
631 movdqa %xmm2,%xmm1
632 pslld $12,%xmm2
633 psrld $20,%xmm1
634 por %xmm1,%xmm2
635 movdqa -80(%ebx),%xmm1
636 paddd %xmm2,%xmm0
637 movdqa 96(%ebx),%xmm7
638 pxor %xmm0,%xmm6
639 movdqa %xmm0,-96(%ebx)
640 pshufb 16(%eax),%xmm6
641 paddd %xmm6,%xmm4
642 movdqa %xmm6,80(%ebx)
643 pxor %xmm4,%xmm2
644 paddd %xmm3,%xmm1
645 movdqa %xmm2,%xmm0
646 pslld $7,%xmm2
647 psrld $25,%xmm0
648 pxor %xmm1,%xmm7
649 por %xmm0,%xmm2
650 pshufb (%eax),%xmm7
651 movdqa %xmm2,-16(%ebx)
652 paddd %xmm7,%xmm5
653 pxor %xmm5,%xmm3
654 movdqa %xmm3,%xmm0
655 pslld $12,%xmm3
656 psrld $20,%xmm0
657 por %xmm0,%xmm3
658 movdqa -128(%ebx),%xmm0
659 paddd %xmm3,%xmm1
660 movdqa 64(%ebx),%xmm6
661 pxor %xmm1,%xmm7
662 movdqa %xmm1,-80(%ebx)
663 pshufb 16(%eax),%xmm7
664 paddd %xmm7,%xmm5
665 movdqa %xmm7,96(%ebx)
666 pxor %xmm5,%xmm3
667 movdqa %xmm3,%xmm1
668 pslld $7,%xmm3
669 psrld $25,%xmm1
670 por %xmm1,%xmm3
671 decl %edx
672 jnz L008loop
673 movdqa %xmm3,-64(%ebx)
674 movdqa %xmm4,(%ebx)
675 movdqa %xmm5,16(%ebx)
676 movdqa %xmm6,64(%ebx)
677 movdqa %xmm7,96(%ebx)
678 movdqa -112(%ebx),%xmm1
679 movdqa -96(%ebx),%xmm2
680 movdqa -80(%ebx),%xmm3
681 paddd -128(%ebp),%xmm0
682 paddd -112(%ebp),%xmm1
683 paddd -96(%ebp),%xmm2
684 paddd -80(%ebp),%xmm3
685 movdqa %xmm0,%xmm6
686 punpckldq %xmm1,%xmm0
687 movdqa %xmm2,%xmm7
688 punpckldq %xmm3,%xmm2
689 punpckhdq %xmm1,%xmm6
690 punpckhdq %xmm3,%xmm7
691 movdqa %xmm0,%xmm1
692 punpcklqdq %xmm2,%xmm0
693 movdqa %xmm6,%xmm3
694 punpcklqdq %xmm7,%xmm6
695 punpckhqdq %xmm2,%xmm1
696 punpckhqdq %xmm7,%xmm3
697 movdqu -128(%esi),%xmm4
698 movdqu -64(%esi),%xmm5
699 movdqu (%esi),%xmm2
700 movdqu 64(%esi),%xmm7
701 leal 16(%esi),%esi
702 pxor %xmm0,%xmm4
703 movdqa -64(%ebx),%xmm0
704 pxor %xmm1,%xmm5
705 movdqa -48(%ebx),%xmm1
706 pxor %xmm2,%xmm6
707 movdqa -32(%ebx),%xmm2
708 pxor %xmm3,%xmm7
709 movdqa -16(%ebx),%xmm3
710 movdqu %xmm4,-128(%edi)
711 movdqu %xmm5,-64(%edi)
712 movdqu %xmm6,(%edi)
713 movdqu %xmm7,64(%edi)
714 leal 16(%edi),%edi
715 paddd -64(%ebp),%xmm0
716 paddd -48(%ebp),%xmm1
717 paddd -32(%ebp),%xmm2
718 paddd -16(%ebp),%xmm3
719 movdqa %xmm0,%xmm6
720 punpckldq %xmm1,%xmm0
721 movdqa %xmm2,%xmm7
722 punpckldq %xmm3,%xmm2
723 punpckhdq %xmm1,%xmm6
724 punpckhdq %xmm3,%xmm7
725 movdqa %xmm0,%xmm1
726 punpcklqdq %xmm2,%xmm0
727 movdqa %xmm6,%xmm3
728 punpcklqdq %xmm7,%xmm6
729 punpckhqdq %xmm2,%xmm1
730 punpckhqdq %xmm7,%xmm3
731 movdqu -128(%esi),%xmm4
732 movdqu -64(%esi),%xmm5
733 movdqu (%esi),%xmm2
734 movdqu 64(%esi),%xmm7
735 leal 16(%esi),%esi
736 pxor %xmm0,%xmm4
737 movdqa (%ebx),%xmm0
738 pxor %xmm1,%xmm5
739 movdqa 16(%ebx),%xmm1
740 pxor %xmm2,%xmm6
741 movdqa 32(%ebx),%xmm2
742 pxor %xmm3,%xmm7
743 movdqa 48(%ebx),%xmm3
744 movdqu %xmm4,-128(%edi)
745 movdqu %xmm5,-64(%edi)
746 movdqu %xmm6,(%edi)
747 movdqu %xmm7,64(%edi)
748 leal 16(%edi),%edi
749 paddd (%ebp),%xmm0
750 paddd 16(%ebp),%xmm1
751 paddd 32(%ebp),%xmm2
752 paddd 48(%ebp),%xmm3
753 movdqa %xmm0,%xmm6
754 punpckldq %xmm1,%xmm0
755 movdqa %xmm2,%xmm7
756 punpckldq %xmm3,%xmm2
757 punpckhdq %xmm1,%xmm6
758 punpckhdq %xmm3,%xmm7
759 movdqa %xmm0,%xmm1
760 punpcklqdq %xmm2,%xmm0
761 movdqa %xmm6,%xmm3
762 punpcklqdq %xmm7,%xmm6
763 punpckhqdq %xmm2,%xmm1
764 punpckhqdq %xmm7,%xmm3
765 movdqu -128(%esi),%xmm4
766 movdqu -64(%esi),%xmm5
767 movdqu (%esi),%xmm2
768 movdqu 64(%esi),%xmm7
769 leal 16(%esi),%esi
770 pxor %xmm0,%xmm4
771 movdqa 64(%ebx),%xmm0
772 pxor %xmm1,%xmm5
773 movdqa 80(%ebx),%xmm1
774 pxor %xmm2,%xmm6
775 movdqa 96(%ebx),%xmm2
776 pxor %xmm3,%xmm7
777 movdqa 112(%ebx),%xmm3
778 movdqu %xmm4,-128(%edi)
779 movdqu %xmm5,-64(%edi)
780 movdqu %xmm6,(%edi)
781 movdqu %xmm7,64(%edi)
782 leal 16(%edi),%edi
783 paddd 64(%ebp),%xmm0
784 paddd 80(%ebp),%xmm1
785 paddd 96(%ebp),%xmm2
786 paddd 112(%ebp),%xmm3
787 movdqa %xmm0,%xmm6
788 punpckldq %xmm1,%xmm0
789 movdqa %xmm2,%xmm7
790 punpckldq %xmm3,%xmm2
791 punpckhdq %xmm1,%xmm6
792 punpckhdq %xmm3,%xmm7
793 movdqa %xmm0,%xmm1
794 punpcklqdq %xmm2,%xmm0
795 movdqa %xmm6,%xmm3
796 punpcklqdq %xmm7,%xmm6
797 punpckhqdq %xmm2,%xmm1
798 punpckhqdq %xmm7,%xmm3
799 movdqu -128(%esi),%xmm4
800 movdqu -64(%esi),%xmm5
801 movdqu (%esi),%xmm2
802 movdqu 64(%esi),%xmm7
803 leal 208(%esi),%esi
804 pxor %xmm0,%xmm4
805 pxor %xmm1,%xmm5
806 pxor %xmm2,%xmm6
807 pxor %xmm3,%xmm7
808 movdqu %xmm4,-128(%edi)
809 movdqu %xmm5,-64(%edi)
810 movdqu %xmm6,(%edi)
811 movdqu %xmm7,64(%edi)
812 leal 208(%edi),%edi
813 subl $256,%ecx
814 jnc L007outer_loop
815 addl $256,%ecx
816 jz L009done
817 movl 520(%esp),%ebx
818 leal -128(%esi),%esi
819 movl 516(%esp),%edx
820 leal -128(%edi),%edi
821 movd 64(%ebp),%xmm2
822 movdqu (%ebx),%xmm3
823 paddd 96(%eax),%xmm2
824 pand 112(%eax),%xmm3
825 por %xmm2,%xmm3
826L0061x:
827 movdqa 32(%eax),%xmm0
828 movdqu (%edx),%xmm1
829 movdqu 16(%edx),%xmm2
830 movdqa (%eax),%xmm6
831 movdqa 16(%eax),%xmm7
832 movl %ebp,48(%esp)
833 movdqa %xmm0,(%esp)
834 movdqa %xmm1,16(%esp)
835 movdqa %xmm2,32(%esp)
836 movdqa %xmm3,48(%esp)
837 movl $10,%edx
838 jmp L010loop1x
839.align 4,0x90
840L011outer1x:
841 movdqa 80(%eax),%xmm3
842 movdqa (%esp),%xmm0
843 movdqa 16(%esp),%xmm1
844 movdqa 32(%esp),%xmm2
845 paddd 48(%esp),%xmm3
846 movl $10,%edx
847 movdqa %xmm3,48(%esp)
848 jmp L010loop1x
849.align 4,0x90
850L010loop1x:
851 paddd %xmm1,%xmm0
852 pxor %xmm0,%xmm3
853.byte 102,15,56,0,222
854 paddd %xmm3,%xmm2
855 pxor %xmm2,%xmm1
856 movdqa %xmm1,%xmm4
857 psrld $20,%xmm1
858 pslld $12,%xmm4
859 por %xmm4,%xmm1
860 paddd %xmm1,%xmm0
861 pxor %xmm0,%xmm3
862.byte 102,15,56,0,223
863 paddd %xmm3,%xmm2
864 pxor %xmm2,%xmm1
865 movdqa %xmm1,%xmm4
866 psrld $25,%xmm1
867 pslld $7,%xmm4
868 por %xmm4,%xmm1
869 pshufd $78,%xmm2,%xmm2
870 pshufd $57,%xmm1,%xmm1
871 pshufd $147,%xmm3,%xmm3
872 nop
873 paddd %xmm1,%xmm0
874 pxor %xmm0,%xmm3
875.byte 102,15,56,0,222
876 paddd %xmm3,%xmm2
877 pxor %xmm2,%xmm1
878 movdqa %xmm1,%xmm4
879 psrld $20,%xmm1
880 pslld $12,%xmm4
881 por %xmm4,%xmm1
882 paddd %xmm1,%xmm0
883 pxor %xmm0,%xmm3
884.byte 102,15,56,0,223
885 paddd %xmm3,%xmm2
886 pxor %xmm2,%xmm1
887 movdqa %xmm1,%xmm4
888 psrld $25,%xmm1
889 pslld $7,%xmm4
890 por %xmm4,%xmm1
891 pshufd $78,%xmm2,%xmm2
892 pshufd $147,%xmm1,%xmm1
893 pshufd $57,%xmm3,%xmm3
894 decl %edx
895 jnz L010loop1x
896 paddd (%esp),%xmm0
897 paddd 16(%esp),%xmm1
898 paddd 32(%esp),%xmm2
899 paddd 48(%esp),%xmm3
900 cmpl $64,%ecx
901 jb L012tail
902 movdqu (%esi),%xmm4
903 movdqu 16(%esi),%xmm5
904 pxor %xmm4,%xmm0
905 movdqu 32(%esi),%xmm4
906 pxor %xmm5,%xmm1
907 movdqu 48(%esi),%xmm5
908 pxor %xmm4,%xmm2
909 pxor %xmm5,%xmm3
910 leal 64(%esi),%esi
911 movdqu %xmm0,(%edi)
912 movdqu %xmm1,16(%edi)
913 movdqu %xmm2,32(%edi)
914 movdqu %xmm3,48(%edi)
915 leal 64(%edi),%edi
916 subl $64,%ecx
917 jnz L011outer1x
918 jmp L009done
919L012tail:
920 movdqa %xmm0,(%esp)
921 movdqa %xmm1,16(%esp)
922 movdqa %xmm2,32(%esp)
923 movdqa %xmm3,48(%esp)
924 xorl %eax,%eax
925 xorl %edx,%edx
926 xorl %ebp,%ebp
927L013tail_loop:
928 movb (%esp,%ebp,1),%al
929 movb (%esi,%ebp,1),%dl
930 leal 1(%ebp),%ebp
931 xorb %dl,%al
932 movb %al,-1(%edi,%ebp,1)
933 decl %ecx
934 jnz L013tail_loop
935L009done:
936 movl 512(%esp),%esp
937 popl %edi
938 popl %esi
939 popl %ebx
940 popl %ebp
941 ret
942.align 6,0x90
943Lssse3_data:
944.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
945.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
946.long 1634760805,857760878,2036477234,1797285236
947.long 0,1,2,3
948.long 4,4,4,4
949.long 1,0,0,0
950.long 4,0,0,0
951.long 0,-1,-1,-1
952.align 6,0x90
953.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
954.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
955.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
956.byte 114,103,62,0
957#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)