Add ABI tests for x86_64-mont5.pl.

Fix some missing CFI bits.

Change-Id: I42114527f0ef8e03079d37a9f466d64a63a313f5
Reviewed-on: https://boringssl-review.googlesource.com/c/34864
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
index 442e696..abcfe6a 100755
--- a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
+++ b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
@@ -566,6 +566,7 @@
 .type	mul4x_internal,\@abi-omnipotent
 .align	32
 mul4x_internal:
+.cfi_startproc
 	shl	\$5,$num		# $num was in bytes
 	movd	`($win64?56:8)`(%rax),%xmm5	# load 7th argument, index
 	lea	.Linc(%rip),%rax
@@ -1060,6 +1061,7 @@
 ___
 }
 $code.=<<___;
+.cfi_endproc
 .size	mul4x_internal,.-mul4x_internal
 ___
 }}}
@@ -1226,6 +1228,7 @@
 .align	32
 bn_sqr8x_internal:
 __bn_sqr8x_internal:
+.cfi_startproc
 	##############################################################
 	# Squaring part:
 	#
@@ -2017,6 +2020,7 @@
 	cmp	%rdx,$tptr		# end of t[]?
 	jb	.L8x_reduction_loop
 	ret
+.cfi_endproc
 .size	bn_sqr8x_internal,.-bn_sqr8x_internal
 ___
 }
@@ -2029,6 +2033,7 @@
 .type	__bn_post4x_internal,\@abi-omnipotent
 .align	32
 __bn_post4x_internal:
+.cfi_startproc
 	mov	8*0($nptr),%r12
 	lea	(%rdi,$num),$tptr	# %rdi was $tptr above
 	mov	$num,%rcx
@@ -2079,6 +2084,7 @@
 	mov	$num,%r10		# prepare for back-to-back call
 	neg	$num			# restore $num
 	ret
+.cfi_endproc
 .size	__bn_post4x_internal,.-__bn_post4x_internal
 ___
 }
@@ -2088,10 +2094,12 @@
 .type	bn_from_montgomery,\@abi-omnipotent
 .align	32
 bn_from_montgomery:
+.cfi_startproc
 	testl	\$7,`($win64?"48(%rsp)":"%r9d")`
 	jz	bn_from_mont8x
 	xor	%eax,%eax
 	ret
+.cfi_endproc
 .size	bn_from_montgomery,.-bn_from_montgomery
 
 .type	bn_from_mont8x,\@function,6
@@ -2388,6 +2396,7 @@
 .type	mulx4x_internal,\@abi-omnipotent
 .align	32
 mulx4x_internal:
+.cfi_startproc
 	mov	$num,8(%rsp)		# save -$num (it was in bytes)
 	mov	$num,%r10
 	neg	$num			# restore $num
@@ -2738,6 +2747,7 @@
 	mov	8*2(%rbp),%r14
 	mov	8*3(%rbp),%r15
 	jmp	.Lsqrx4x_sub_entry	# common post-condition
+.cfi_endproc
 .size	mulx4x_internal,.-mulx4x_internal
 ___
 }{
@@ -3542,7 +3552,9 @@
 my ($rptr,$nptr)=("%rdx","%rbp");
 $code.=<<___;
 .align	32
+.type	__bn_postx4x_internal,\@abi-omnipotent
 __bn_postx4x_internal:
+.cfi_startproc
 	mov	8*0($nptr),%r12
 	mov	%rcx,%r10		# -$num
 	mov	%rcx,%r9		# -$num
@@ -3590,6 +3602,7 @@
 	neg	%r9			# restore $num
 
 	ret
+.cfi_endproc
 .size	__bn_postx4x_internal,.-__bn_postx4x_internal
 ___
 }
@@ -3606,6 +3619,7 @@
 .type	bn_scatter5,\@abi-omnipotent
 .align	16
 bn_scatter5:
+.cfi_startproc
 	cmp	\$0, $num
 	jz	.Lscatter_epilogue
 	lea	($tbl,$idx,8),$tbl
@@ -3618,15 +3632,18 @@
 	jnz	.Lscatter
 .Lscatter_epilogue:
 	ret
+.cfi_endproc
 .size	bn_scatter5,.-bn_scatter5
 
 .globl	bn_gather5
 .type	bn_gather5,\@abi-omnipotent
 .align	32
 bn_gather5:
+.cfi_startproc
 .LSEH_begin_bn_gather5:			# Win64 thing, but harmless in other cases
 	# I can't trust assembler to use specific encoding:-(
 	.byte	0x4c,0x8d,0x14,0x24			#lea    (%rsp),%r10
+.cfi_def_cfa_register	%r10
 	.byte	0x48,0x81,0xec,0x08,0x01,0x00,0x00	#sub	$0x108,%rsp
 	lea	.Linc(%rip),%rax
 	and	\$-16,%rsp		# shouldn't be formally required
@@ -3707,8 +3724,10 @@
 	jnz	.Lgather
 
 	lea	(%r10),%rsp
+.cfi_def_cfa_register	%rsp
 	ret
 .LSEH_end_bn_gather5:
+.cfi_endproc
 .size	bn_gather5,.-bn_gather5
 ___
 }
diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc
index b7d98da..a61d6e1 100644
--- a/crypto/fipsmodule/bn/bn_test.cc
+++ b/crypto/fipsmodule/bn/bn_test.cc
@@ -2405,6 +2405,55 @@
 }
 #endif   // OPENSSL_BN_ASM_MONT && SUPPORTS_ABI_TEST
 
+#if defined(OPENSSL_BN_ASM_MONT5) && defined(SUPPORTS_ABI_TEST)
+TEST_F(BNTest, BNMulMont5ABI) {
+  for (size_t words : {4, 5, 6, 7, 8, 16, 32}) {
+    SCOPED_TRACE(words);
+
+    bssl::UniquePtr<BIGNUM> m(BN_new());
+    ASSERT_TRUE(m);
+    ASSERT_TRUE(BN_set_bit(m.get(), 0));
+    ASSERT_TRUE(BN_set_bit(m.get(), words * BN_BITS2 - 1));
+    bssl::UniquePtr<BN_MONT_CTX> mont(
+        BN_MONT_CTX_new_for_modulus(m.get(), ctx()));
+    ASSERT_TRUE(mont);
+
+    std::vector<BN_ULONG> r(words), a(words), b(words), table(words * 32);
+    a[0] = 1;
+    b[0] = 42;
+
+    bn_mul_mont(r.data(), a.data(), b.data(), mont->N.d, mont->n0, words);
+    CHECK_ABI(bn_scatter5, r.data(), words, table.data(), 13);
+    for (size_t i = 0; i < 32; i++) {
+      bn_mul_mont(r.data(), a.data(), b.data(), mont->N.d, mont->n0, words);
+      bn_scatter5(r.data(), words, table.data(), i);
+    }
+    CHECK_ABI(bn_gather5, r.data(), words, table.data(), 13);
+
+    CHECK_ABI(bn_mul_mont_gather5, r.data(), r.data(), table.data(), m->d,
+              mont->n0, words, 13);
+    CHECK_ABI(bn_mul_mont_gather5, r.data(), a.data(), table.data(), m->d,
+              mont->n0, words, 13);
+
+    if (words % 8 == 0) {
+      CHECK_ABI(bn_power5, r.data(), r.data(), table.data(), m->d, mont->n0,
+                words, 13);
+      CHECK_ABI(bn_power5, r.data(), a.data(), table.data(), m->d, mont->n0,
+                words, 13);
+      EXPECT_EQ(1, CHECK_ABI(bn_from_montgomery, r.data(), r.data(), nullptr,
+                             m->d, mont->n0, words));
+      EXPECT_EQ(1, CHECK_ABI(bn_from_montgomery, r.data(), a.data(), nullptr,
+                             m->d, mont->n0, words));
+    } else {
+      EXPECT_EQ(0, CHECK_ABI(bn_from_montgomery, r.data(), r.data(), nullptr,
+                             m->d, mont->n0, words));
+      EXPECT_EQ(0, CHECK_ABI(bn_from_montgomery, r.data(), a.data(), nullptr,
+                             m->d, mont->n0, words));
+    }
+  }
+}
+#endif  // OPENSSL_BN_ASM_MONT5 && SUPPORTS_ABI_TEST
+
 #if defined(RSAZ_ENABLED) && defined(SUPPORTS_ABI_TEST)
 TEST_F(BNTest, RSAZABI) {
   if (!rsaz_avx2_capable()) {
diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c
index 29e7877..9e40811 100644
--- a/crypto/fipsmodule/bn/exponentiation.c
+++ b/crypto/fipsmodule/bn/exponentiation.c
@@ -120,22 +120,6 @@
 #include "rsaz_exp.h"
 
 
-#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
-#define OPENSSL_BN_ASM_MONT5
-
-void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
-                         const BN_ULONG *table, const BN_ULONG *np,
-                         const BN_ULONG *n0, int num, int power);
-void bn_scatter5(const BN_ULONG *inp, size_t num, BN_ULONG *table,
-                 size_t power);
-void bn_gather5(BN_ULONG *out, size_t num, BN_ULONG *table, size_t power);
-void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table,
-               const BN_ULONG *np, const BN_ULONG *n0, int num, int power);
-int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
-                       const BN_ULONG *not_used, const BN_ULONG *np,
-                       const BN_ULONG *n0, int num);
-#endif
-
 int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
   int i, bits, ret = 0;
   BIGNUM *v, *rr;
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index 752f0dc..c1e60fe 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -357,6 +357,44 @@
                 const BN_ULONG *np, const BN_ULONG *n0, size_t num);
 #endif
 
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
+#define OPENSSL_BN_ASM_MONT5
+
+// bn_mul_mont_gather5 multiples loads index |power| of |table|, multiplies it
+// by |ap| modulo |np|, and stores the result in |rp|. The values are |num|
+// words long and represented in Montgomery form. |n0| is a pointer to the
+// corresponding field in |BN_MONT_CTX|.
+void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
+                         const BN_ULONG *table, const BN_ULONG *np,
+                         const BN_ULONG *n0, int num, int power);
+
+// bn_scatter5 stores |inp| to index |power| of |table|. |inp| and each entry of
+// |table| are |num| words long. |power| must be less than 32. |table| must be
+// 32*|num| words long.
+void bn_scatter5(const BN_ULONG *inp, size_t num, BN_ULONG *table,
+                 size_t power);
+
+// bn_gather5 loads index |power| of |table| and stores it in |out|. |out| and
+// each entry of |table| are |num| words long. |power| must be less than 32.
+void bn_gather5(BN_ULONG *out, size_t num, BN_ULONG *table, size_t power);
+
+// bn_power5 squares |ap| five times and multiplies it by the value stored at
+// index |power| of |table|, modulo |np|. It stores the result in |rp|. The
+// values are |num| words long and represented in Montgomery form. |n0| is a
+// pointer to the corresponding field in |BN_MONT_CTX|. |num| must be divisible
+// by 8.
+void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table,
+               const BN_ULONG *np, const BN_ULONG *n0, int num, int power);
+
+// bn_from_montgomery converts |ap| from Montgomery form modulo |np| and writes
+// the result in |rp|, each of which is |num| words long. It returns one on
+// success and zero if it cannot handle inputs of length |num|. |n0| is a
+// pointer to the corresponding field in |BN_MONT_CTX|.
+int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
+                       const BN_ULONG *not_used, const BN_ULONG *np,
+                       const BN_ULONG *n0, int num);
+#endif  // !OPENSSL_NO_ASM && OPENSSL_X86_64
+
 uint64_t bn_mont_n0(const BIGNUM *n);
 
 // bn_mod_exp_base_2_consttime calculates r = 2**p (mod n). |p| must be larger