Add ABI tests for x86_64-mont5.pl.
Fix some missing CFI bits.
Change-Id: I42114527f0ef8e03079d37a9f466d64a63a313f5
Reviewed-on: https://boringssl-review.googlesource.com/c/34864
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
index 442e696..abcfe6a 100755
--- a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
+++ b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
@@ -566,6 +566,7 @@
.type mul4x_internal,\@abi-omnipotent
.align 32
mul4x_internal:
+.cfi_startproc
shl \$5,$num # $num was in bytes
movd `($win64?56:8)`(%rax),%xmm5 # load 7th argument, index
lea .Linc(%rip),%rax
@@ -1060,6 +1061,7 @@
___
}
$code.=<<___;
+.cfi_endproc
.size mul4x_internal,.-mul4x_internal
___
}}}
@@ -1226,6 +1228,7 @@
.align 32
bn_sqr8x_internal:
__bn_sqr8x_internal:
+.cfi_startproc
##############################################################
# Squaring part:
#
@@ -2017,6 +2020,7 @@
cmp %rdx,$tptr # end of t[]?
jb .L8x_reduction_loop
ret
+.cfi_endproc
.size bn_sqr8x_internal,.-bn_sqr8x_internal
___
}
@@ -2029,6 +2033,7 @@
.type __bn_post4x_internal,\@abi-omnipotent
.align 32
__bn_post4x_internal:
+.cfi_startproc
mov 8*0($nptr),%r12
lea (%rdi,$num),$tptr # %rdi was $tptr above
mov $num,%rcx
@@ -2079,6 +2084,7 @@
mov $num,%r10 # prepare for back-to-back call
neg $num # restore $num
ret
+.cfi_endproc
.size __bn_post4x_internal,.-__bn_post4x_internal
___
}
@@ -2088,10 +2094,12 @@
.type bn_from_montgomery,\@abi-omnipotent
.align 32
bn_from_montgomery:
+.cfi_startproc
testl \$7,`($win64?"48(%rsp)":"%r9d")`
jz bn_from_mont8x
xor %eax,%eax
ret
+.cfi_endproc
.size bn_from_montgomery,.-bn_from_montgomery
.type bn_from_mont8x,\@function,6
@@ -2388,6 +2396,7 @@
.type mulx4x_internal,\@abi-omnipotent
.align 32
mulx4x_internal:
+.cfi_startproc
mov $num,8(%rsp) # save -$num (it was in bytes)
mov $num,%r10
neg $num # restore $num
@@ -2738,6 +2747,7 @@
mov 8*2(%rbp),%r14
mov 8*3(%rbp),%r15
jmp .Lsqrx4x_sub_entry # common post-condition
+.cfi_endproc
.size mulx4x_internal,.-mulx4x_internal
___
}{
@@ -3542,7 +3552,9 @@
my ($rptr,$nptr)=("%rdx","%rbp");
$code.=<<___;
.align 32
+.type __bn_postx4x_internal,\@abi-omnipotent
__bn_postx4x_internal:
+.cfi_startproc
mov 8*0($nptr),%r12
mov %rcx,%r10 # -$num
mov %rcx,%r9 # -$num
@@ -3590,6 +3602,7 @@
neg %r9 # restore $num
ret
+.cfi_endproc
.size __bn_postx4x_internal,.-__bn_postx4x_internal
___
}
@@ -3606,6 +3619,7 @@
.type bn_scatter5,\@abi-omnipotent
.align 16
bn_scatter5:
+.cfi_startproc
cmp \$0, $num
jz .Lscatter_epilogue
lea ($tbl,$idx,8),$tbl
@@ -3618,15 +3632,18 @@
jnz .Lscatter
.Lscatter_epilogue:
ret
+.cfi_endproc
.size bn_scatter5,.-bn_scatter5
.globl bn_gather5
.type bn_gather5,\@abi-omnipotent
.align 32
bn_gather5:
+.cfi_startproc
.LSEH_begin_bn_gather5: # Win64 thing, but harmless in other cases
# I can't trust assembler to use specific encoding:-(
.byte 0x4c,0x8d,0x14,0x24 #lea (%rsp),%r10
+.cfi_def_cfa_register %r10
.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 #sub $0x108,%rsp
lea .Linc(%rip),%rax
and \$-16,%rsp # shouldn't be formally required
@@ -3707,8 +3724,10 @@
jnz .Lgather
lea (%r10),%rsp
+.cfi_def_cfa_register %rsp
ret
.LSEH_end_bn_gather5:
+.cfi_endproc
.size bn_gather5,.-bn_gather5
___
}
diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc
index b7d98da..a61d6e1 100644
--- a/crypto/fipsmodule/bn/bn_test.cc
+++ b/crypto/fipsmodule/bn/bn_test.cc
@@ -2405,6 +2405,55 @@
}
#endif // OPENSSL_BN_ASM_MONT && SUPPORTS_ABI_TEST
+#if defined(OPENSSL_BN_ASM_MONT5) && defined(SUPPORTS_ABI_TEST)
+TEST_F(BNTest, BNMulMont5ABI) {
+ for (size_t words : {4, 5, 6, 7, 8, 16, 32}) {
+ SCOPED_TRACE(words);
+
+ bssl::UniquePtr<BIGNUM> m(BN_new());
+ ASSERT_TRUE(m);
+ ASSERT_TRUE(BN_set_bit(m.get(), 0));
+ ASSERT_TRUE(BN_set_bit(m.get(), words * BN_BITS2 - 1));
+ bssl::UniquePtr<BN_MONT_CTX> mont(
+ BN_MONT_CTX_new_for_modulus(m.get(), ctx()));
+ ASSERT_TRUE(mont);
+
+ std::vector<BN_ULONG> r(words), a(words), b(words), table(words * 32);
+ a[0] = 1;
+ b[0] = 42;
+
+ bn_mul_mont(r.data(), a.data(), b.data(), mont->N.d, mont->n0, words);
+ CHECK_ABI(bn_scatter5, r.data(), words, table.data(), 13);
+ for (size_t i = 0; i < 32; i++) {
+ bn_mul_mont(r.data(), a.data(), b.data(), mont->N.d, mont->n0, words);
+ bn_scatter5(r.data(), words, table.data(), i);
+ }
+ CHECK_ABI(bn_gather5, r.data(), words, table.data(), 13);
+
+ CHECK_ABI(bn_mul_mont_gather5, r.data(), r.data(), table.data(), m->d,
+ mont->n0, words, 13);
+ CHECK_ABI(bn_mul_mont_gather5, r.data(), a.data(), table.data(), m->d,
+ mont->n0, words, 13);
+
+ if (words % 8 == 0) {
+ CHECK_ABI(bn_power5, r.data(), r.data(), table.data(), m->d, mont->n0,
+ words, 13);
+ CHECK_ABI(bn_power5, r.data(), a.data(), table.data(), m->d, mont->n0,
+ words, 13);
+ EXPECT_EQ(1, CHECK_ABI(bn_from_montgomery, r.data(), r.data(), nullptr,
+ m->d, mont->n0, words));
+ EXPECT_EQ(1, CHECK_ABI(bn_from_montgomery, r.data(), a.data(), nullptr,
+ m->d, mont->n0, words));
+ } else {
+ EXPECT_EQ(0, CHECK_ABI(bn_from_montgomery, r.data(), r.data(), nullptr,
+ m->d, mont->n0, words));
+ EXPECT_EQ(0, CHECK_ABI(bn_from_montgomery, r.data(), a.data(), nullptr,
+ m->d, mont->n0, words));
+ }
+ }
+}
+#endif // OPENSSL_BN_ASM_MONT5 && SUPPORTS_ABI_TEST
+
#if defined(RSAZ_ENABLED) && defined(SUPPORTS_ABI_TEST)
TEST_F(BNTest, RSAZABI) {
if (!rsaz_avx2_capable()) {
diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c
index 29e7877..9e40811 100644
--- a/crypto/fipsmodule/bn/exponentiation.c
+++ b/crypto/fipsmodule/bn/exponentiation.c
@@ -120,22 +120,6 @@
#include "rsaz_exp.h"
-#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
-#define OPENSSL_BN_ASM_MONT5
-
-void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
- const BN_ULONG *table, const BN_ULONG *np,
- const BN_ULONG *n0, int num, int power);
-void bn_scatter5(const BN_ULONG *inp, size_t num, BN_ULONG *table,
- size_t power);
-void bn_gather5(BN_ULONG *out, size_t num, BN_ULONG *table, size_t power);
-void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table,
- const BN_ULONG *np, const BN_ULONG *n0, int num, int power);
-int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
- const BN_ULONG *not_used, const BN_ULONG *np,
- const BN_ULONG *n0, int num);
-#endif
-
int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
int i, bits, ret = 0;
BIGNUM *v, *rr;
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index 752f0dc..c1e60fe 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -357,6 +357,44 @@
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
#endif
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
+#define OPENSSL_BN_ASM_MONT5
+
+// bn_mul_mont_gather5 multiples loads index |power| of |table|, multiplies it
+// by |ap| modulo |np|, and stores the result in |rp|. The values are |num|
+// words long and represented in Montgomery form. |n0| is a pointer to the
+// corresponding field in |BN_MONT_CTX|.
+void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
+ const BN_ULONG *table, const BN_ULONG *np,
+ const BN_ULONG *n0, int num, int power);
+
+// bn_scatter5 stores |inp| to index |power| of |table|. |inp| and each entry of
+// |table| are |num| words long. |power| must be less than 32. |table| must be
+// 32*|num| words long.
+void bn_scatter5(const BN_ULONG *inp, size_t num, BN_ULONG *table,
+ size_t power);
+
+// bn_gather5 loads index |power| of |table| and stores it in |out|. |out| and
+// each entry of |table| are |num| words long. |power| must be less than 32.
+void bn_gather5(BN_ULONG *out, size_t num, BN_ULONG *table, size_t power);
+
+// bn_power5 squares |ap| five times and multiplies it by the value stored at
+// index |power| of |table|, modulo |np|. It stores the result in |rp|. The
+// values are |num| words long and represented in Montgomery form. |n0| is a
+// pointer to the corresponding field in |BN_MONT_CTX|. |num| must be divisible
+// by 8.
+void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table,
+ const BN_ULONG *np, const BN_ULONG *n0, int num, int power);
+
+// bn_from_montgomery converts |ap| from Montgomery form modulo |np| and writes
+// the result in |rp|, each of which is |num| words long. It returns one on
+// success and zero if it cannot handle inputs of length |num|. |n0| is a
+// pointer to the corresponding field in |BN_MONT_CTX|.
+int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
+ const BN_ULONG *not_used, const BN_ULONG *np,
+ const BN_ULONG *n0, int num);
+#endif // !OPENSSL_NO_ASM && OPENSSL_X86_64
+
uint64_t bn_mont_n0(const BIGNUM *n);
// bn_mod_exp_base_2_consttime calculates r = 2**p (mod n). |p| must be larger