| #!/usr/bin/env perl |
| # Copyright (c) 2023, Google Inc. |
| # |
| # Permission to use, copy, modify, and/or distribute this software for any |
| # purpose with or without fee is hereby granted, provided that the above |
| # copyright notice and this permission notice appear in all copies. |
| # |
| # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
| # SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
| # OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
| # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| |
| use strict; |
| |
| my $flavour = shift; |
| my $output = shift; |
| if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } |
| |
| $0 =~ m/(.*[\/\\])[^\/\\]+$/; |
| my $dir = $1; |
| my $xlate; |
| ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or |
| ( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or |
| die "can't locate arm-xlate.pl"; |
| |
| open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\""; |
| *STDOUT = *OUT; |
| |
| my ($rp, $ap, $bp, $num) = ("x0", "x1", "x2", "x3"); |
| my ($a0, $a1, $b0, $b1, $num_pairs) = ("x4", "x5", "x6", "x7", "x8"); |
| my $code = <<____; |
| #include <openssl/arm_arch.h> |
| |
| .text |
| |
| // BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
| // size_t num); |
| .type bn_add_words, %function |
| .globl bn_add_words |
| .align 4 |
| bn_add_words: |
| AARCH64_VALID_CALL_TARGET |
| # Clear the carry flag. |
| cmn xzr, xzr |
| |
| # aarch64 can load two registers at a time, so we do two loop iterations at |
| # at a time. Split $num = 2 * $num_pairs + $num. This allows loop |
| # operations to use CBNZ without clobbering the carry flag. |
| lsr $num_pairs, $num, #1 |
| and $num, $num, #1 |
| |
| cbz $num_pairs, .Ladd_tail |
| .Ladd_loop: |
| ldp $a0, $a1, [$ap], #16 |
| ldp $b0, $b1, [$bp], #16 |
| sub $num_pairs, $num_pairs, #1 |
| adcs $a0, $a0, $b0 |
| adcs $a1, $a1, $b1 |
| stp $a0, $a1, [$rp], #16 |
| cbnz $num_pairs, .Ladd_loop |
| |
| .Ladd_tail: |
| cbz $num, .Ladd_exit |
| ldr $a0, [$ap], #8 |
| ldr $b0, [$bp], #8 |
| adcs $a0, $a0, $b0 |
| str $a0, [$rp], #8 |
| |
| .Ladd_exit: |
| cset x0, cs |
| ret |
| .size bn_add_words,.-bn_add_words |
| |
| // BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
| // size_t num); |
| .type bn_sub_words, %function |
| .globl bn_sub_words |
| .align 4 |
| bn_sub_words: |
| AARCH64_VALID_CALL_TARGET |
| # Set the carry flag. Arm's borrow bit is flipped from the carry flag, |
| # so we want C = 1 here. |
| cmp xzr, xzr |
| |
| # aarch64 can load two registers at a time, so we do two loop iterations at |
| # at a time. Split $num = 2 * $num_pairs + $num. This allows loop |
| # operations to use CBNZ without clobbering the carry flag. |
| lsr $num_pairs, $num, #1 |
| and $num, $num, #1 |
| |
| cbz $num_pairs, .Lsub_tail |
| .Lsub_loop: |
| ldp $a0, $a1, [$ap], #16 |
| ldp $b0, $b1, [$bp], #16 |
| sub $num_pairs, $num_pairs, #1 |
| sbcs $a0, $a0, $b0 |
| sbcs $a1, $a1, $b1 |
| stp $a0, $a1, [$rp], #16 |
| cbnz $num_pairs, .Lsub_loop |
| |
| .Lsub_tail: |
| cbz $num, .Lsub_exit |
| ldr $a0, [$ap], #8 |
| ldr $b0, [$bp], #8 |
| sbcs $a0, $a0, $b0 |
| str $a0, [$rp], #8 |
| |
| .Lsub_exit: |
| cset x0, cc |
| ret |
| size bn_sub_words,.-bn_sub_words |
| ____ |
| |
| print $code; |
| close STDOUT or die "error closing STDOUT: $!"; |