blob: 5aed8df15f41b6b7dd25e55c1a24e853225c16ba [file] [log] [blame]
#!/usr/bin/env perl
# Copyright (c) 2023, Google Inc.
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
use strict;
my $flavour = shift;
my $output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
$0 =~ m/(.*[\/\\])[^\/\\]+$/;
my $dir = $1;
my $xlate;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT = *OUT;
my ($rp, $ap, $bp, $num) = ("x0", "x1", "x2", "x3");
my ($a0, $a1, $b0, $b1, $num_pairs) = ("x4", "x5", "x6", "x7", "x8");
my $code = <<____;
#include <openssl/arm_arch.h>
.text
// BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
// size_t num);
.type bn_add_words, %function
.globl bn_add_words
.align 4
bn_add_words:
AARCH64_VALID_CALL_TARGET
# Clear the carry flag.
cmn xzr, xzr
# aarch64 can load two registers at a time, so we do two loop iterations at
# at a time. Split $num = 2 * $num_pairs + $num. This allows loop
# operations to use CBNZ without clobbering the carry flag.
lsr $num_pairs, $num, #1
and $num, $num, #1
cbz $num_pairs, .Ladd_tail
.Ladd_loop:
ldp $a0, $a1, [$ap], #16
ldp $b0, $b1, [$bp], #16
sub $num_pairs, $num_pairs, #1
adcs $a0, $a0, $b0
adcs $a1, $a1, $b1
stp $a0, $a1, [$rp], #16
cbnz $num_pairs, .Ladd_loop
.Ladd_tail:
cbz $num, .Ladd_exit
ldr $a0, [$ap], #8
ldr $b0, [$bp], #8
adcs $a0, $a0, $b0
str $a0, [$rp], #8
.Ladd_exit:
cset x0, cs
ret
.size bn_add_words,.-bn_add_words
// BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
// size_t num);
.type bn_sub_words, %function
.globl bn_sub_words
.align 4
bn_sub_words:
AARCH64_VALID_CALL_TARGET
# Set the carry flag. Arm's borrow bit is flipped from the carry flag,
# so we want C = 1 here.
cmp xzr, xzr
# aarch64 can load two registers at a time, so we do two loop iterations at
# at a time. Split $num = 2 * $num_pairs + $num. This allows loop
# operations to use CBNZ without clobbering the carry flag.
lsr $num_pairs, $num, #1
and $num, $num, #1
cbz $num_pairs, .Lsub_tail
.Lsub_loop:
ldp $a0, $a1, [$ap], #16
ldp $b0, $b1, [$bp], #16
sub $num_pairs, $num_pairs, #1
sbcs $a0, $a0, $b0
sbcs $a1, $a1, $b1
stp $a0, $a1, [$rp], #16
cbnz $num_pairs, .Lsub_loop
.Lsub_tail:
cbz $num, .Lsub_exit
ldr $a0, [$ap], #8
ldr $b0, [$bp], #8
sbcs $a0, $a0, $b0
str $a0, [$rp], #8
.Lsub_exit:
cset x0, cc
ret
size bn_sub_words,.-bn_sub_words
____
print $code;
close STDOUT or die "error closing STDOUT: $!";