Add ABI tests for x86_64-mont5.pl. Fix some missing CFI bits. Change-Id: I42114527f0ef8e03079d37a9f466d64a63a313f5 Reviewed-on: https://boringssl-review.googlesource.com/c/34864 Commit-Queue: David Benjamin <davidben@google.com> Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl index 442e696..abcfe6a 100755 --- a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl +++ b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
@@ -566,6 +566,7 @@ .type mul4x_internal,\@abi-omnipotent .align 32 mul4x_internal: +.cfi_startproc shl \$5,$num # $num was in bytes movd `($win64?56:8)`(%rax),%xmm5 # load 7th argument, index lea .Linc(%rip),%rax @@ -1060,6 +1061,7 @@ ___ } $code.=<<___; +.cfi_endproc .size mul4x_internal,.-mul4x_internal ___ }}} @@ -1226,6 +1228,7 @@ .align 32 bn_sqr8x_internal: __bn_sqr8x_internal: +.cfi_startproc ############################################################## # Squaring part: # @@ -2017,6 +2020,7 @@ cmp %rdx,$tptr # end of t[]? jb .L8x_reduction_loop ret +.cfi_endproc .size bn_sqr8x_internal,.-bn_sqr8x_internal ___ } @@ -2029,6 +2033,7 @@ .type __bn_post4x_internal,\@abi-omnipotent .align 32 __bn_post4x_internal: +.cfi_startproc mov 8*0($nptr),%r12 lea (%rdi,$num),$tptr # %rdi was $tptr above mov $num,%rcx @@ -2079,6 +2084,7 @@ mov $num,%r10 # prepare for back-to-back call neg $num # restore $num ret +.cfi_endproc .size __bn_post4x_internal,.-__bn_post4x_internal ___ } @@ -2088,10 +2094,12 @@ .type bn_from_montgomery,\@abi-omnipotent .align 32 bn_from_montgomery: +.cfi_startproc testl \$7,`($win64?"48(%rsp)":"%r9d")` jz bn_from_mont8x xor %eax,%eax ret +.cfi_endproc .size bn_from_montgomery,.-bn_from_montgomery .type bn_from_mont8x,\@function,6 @@ -2388,6 +2396,7 @@ .type mulx4x_internal,\@abi-omnipotent .align 32 mulx4x_internal: +.cfi_startproc mov $num,8(%rsp) # save -$num (it was in bytes) mov $num,%r10 neg $num # restore $num @@ -2738,6 +2747,7 @@ mov 8*2(%rbp),%r14 mov 8*3(%rbp),%r15 jmp .Lsqrx4x_sub_entry # common post-condition +.cfi_endproc .size mulx4x_internal,.-mulx4x_internal ___ }{ @@ -3542,7 +3552,9 @@ my ($rptr,$nptr)=("%rdx","%rbp"); $code.=<<___; .align 32 +.type __bn_postx4x_internal,\@abi-omnipotent __bn_postx4x_internal: +.cfi_startproc mov 8*0($nptr),%r12 mov %rcx,%r10 # -$num mov %rcx,%r9 # -$num @@ -3590,6 +3602,7 @@ neg %r9 # restore $num ret +.cfi_endproc .size __bn_postx4x_internal,.-__bn_postx4x_internal ___ } @@ -3606,6 +3619,7 @@ .type bn_scatter5,\@abi-omnipotent .align 16 bn_scatter5: +.cfi_startproc cmp \$0, $num jz .Lscatter_epilogue lea ($tbl,$idx,8),$tbl @@ -3618,15 +3632,18 @@ jnz .Lscatter .Lscatter_epilogue: ret +.cfi_endproc .size bn_scatter5,.-bn_scatter5 .globl bn_gather5 .type bn_gather5,\@abi-omnipotent .align 32 bn_gather5: +.cfi_startproc .LSEH_begin_bn_gather5: # Win64 thing, but harmless in other cases # I can't trust assembler to use specific encoding:-( .byte 0x4c,0x8d,0x14,0x24 #lea (%rsp),%r10 +.cfi_def_cfa_register %r10 .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 #sub $0x108,%rsp lea .Linc(%rip),%rax and \$-16,%rsp # shouldn't be formally required @@ -3707,8 +3724,10 @@ jnz .Lgather lea (%r10),%rsp +.cfi_def_cfa_register %rsp ret .LSEH_end_bn_gather5: +.cfi_endproc .size bn_gather5,.-bn_gather5 ___ }
diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc index b7d98da..a61d6e1 100644 --- a/crypto/fipsmodule/bn/bn_test.cc +++ b/crypto/fipsmodule/bn/bn_test.cc
@@ -2405,6 +2405,55 @@ } #endif // OPENSSL_BN_ASM_MONT && SUPPORTS_ABI_TEST +#if defined(OPENSSL_BN_ASM_MONT5) && defined(SUPPORTS_ABI_TEST) +TEST_F(BNTest, BNMulMont5ABI) { + for (size_t words : {4, 5, 6, 7, 8, 16, 32}) { + SCOPED_TRACE(words); + + bssl::UniquePtr<BIGNUM> m(BN_new()); + ASSERT_TRUE(m); + ASSERT_TRUE(BN_set_bit(m.get(), 0)); + ASSERT_TRUE(BN_set_bit(m.get(), words * BN_BITS2 - 1)); + bssl::UniquePtr<BN_MONT_CTX> mont( + BN_MONT_CTX_new_for_modulus(m.get(), ctx())); + ASSERT_TRUE(mont); + + std::vector<BN_ULONG> r(words), a(words), b(words), table(words * 32); + a[0] = 1; + b[0] = 42; + + bn_mul_mont(r.data(), a.data(), b.data(), mont->N.d, mont->n0, words); + CHECK_ABI(bn_scatter5, r.data(), words, table.data(), 13); + for (size_t i = 0; i < 32; i++) { + bn_mul_mont(r.data(), a.data(), b.data(), mont->N.d, mont->n0, words); + bn_scatter5(r.data(), words, table.data(), i); + } + CHECK_ABI(bn_gather5, r.data(), words, table.data(), 13); + + CHECK_ABI(bn_mul_mont_gather5, r.data(), r.data(), table.data(), m->d, + mont->n0, words, 13); + CHECK_ABI(bn_mul_mont_gather5, r.data(), a.data(), table.data(), m->d, + mont->n0, words, 13); + + if (words % 8 == 0) { + CHECK_ABI(bn_power5, r.data(), r.data(), table.data(), m->d, mont->n0, + words, 13); + CHECK_ABI(bn_power5, r.data(), a.data(), table.data(), m->d, mont->n0, + words, 13); + EXPECT_EQ(1, CHECK_ABI(bn_from_montgomery, r.data(), r.data(), nullptr, + m->d, mont->n0, words)); + EXPECT_EQ(1, CHECK_ABI(bn_from_montgomery, r.data(), a.data(), nullptr, + m->d, mont->n0, words)); + } else { + EXPECT_EQ(0, CHECK_ABI(bn_from_montgomery, r.data(), r.data(), nullptr, + m->d, mont->n0, words)); + EXPECT_EQ(0, CHECK_ABI(bn_from_montgomery, r.data(), a.data(), nullptr, + m->d, mont->n0, words)); + } + } +} +#endif // OPENSSL_BN_ASM_MONT5 && SUPPORTS_ABI_TEST + #if defined(RSAZ_ENABLED) && defined(SUPPORTS_ABI_TEST) TEST_F(BNTest, RSAZABI) { if (!rsaz_avx2_capable()) {
diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c index 29e7877..9e40811 100644 --- a/crypto/fipsmodule/bn/exponentiation.c +++ b/crypto/fipsmodule/bn/exponentiation.c
@@ -120,22 +120,6 @@ #include "rsaz_exp.h" -#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) -#define OPENSSL_BN_ASM_MONT5 - -void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, - const BN_ULONG *table, const BN_ULONG *np, - const BN_ULONG *n0, int num, int power); -void bn_scatter5(const BN_ULONG *inp, size_t num, BN_ULONG *table, - size_t power); -void bn_gather5(BN_ULONG *out, size_t num, BN_ULONG *table, size_t power); -void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, - const BN_ULONG *np, const BN_ULONG *n0, int num, int power); -int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap, - const BN_ULONG *not_used, const BN_ULONG *np, - const BN_ULONG *n0, int num); -#endif - int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { int i, bits, ret = 0; BIGNUM *v, *rr;
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index 752f0dc..c1e60fe 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h
@@ -357,6 +357,44 @@ const BN_ULONG *np, const BN_ULONG *n0, size_t num); #endif +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) +#define OPENSSL_BN_ASM_MONT5 + +// bn_mul_mont_gather5 multiples loads index |power| of |table|, multiplies it +// by |ap| modulo |np|, and stores the result in |rp|. The values are |num| +// words long and represented in Montgomery form. |n0| is a pointer to the +// corresponding field in |BN_MONT_CTX|. +void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power); + +// bn_scatter5 stores |inp| to index |power| of |table|. |inp| and each entry of +// |table| are |num| words long. |power| must be less than 32. |table| must be +// 32*|num| words long. +void bn_scatter5(const BN_ULONG *inp, size_t num, BN_ULONG *table, + size_t power); + +// bn_gather5 loads index |power| of |table| and stores it in |out|. |out| and +// each entry of |table| are |num| words long. |power| must be less than 32. +void bn_gather5(BN_ULONG *out, size_t num, BN_ULONG *table, size_t power); + +// bn_power5 squares |ap| five times and multiplies it by the value stored at +// index |power| of |table|, modulo |np|. It stores the result in |rp|. The +// values are |num| words long and represented in Montgomery form. |n0| is a +// pointer to the corresponding field in |BN_MONT_CTX|. |num| must be divisible +// by 8. +void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table, + const BN_ULONG *np, const BN_ULONG *n0, int num, int power); + +// bn_from_montgomery converts |ap| from Montgomery form modulo |np| and writes +// the result in |rp|, each of which is |num| words long. It returns one on +// success and zero if it cannot handle inputs of length |num|. |n0| is a +// pointer to the corresponding field in |BN_MONT_CTX|. +int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *not_used, const BN_ULONG *np, + const BN_ULONG *n0, int num); +#endif // !OPENSSL_NO_ASM && OPENSSL_X86_64 + uint64_t bn_mont_n0(const BIGNUM *n); // bn_mod_exp_base_2_consttime calculates r = 2**p (mod n). |p| must be larger