Move NEON dispatch in bn_mul_mont to C This clears the last reference to OPENSSL_armcap_P from assembly! Bug: 673 Change-Id: Id5d6115535742b2e980ed262d920ae28941841e8 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65868 Reviewed-by: Bob Beck <bbe@google.com> Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/bn/asm/armv4-mont.pl b/crypto/fipsmodule/bn/asm/armv4-mont.pl index dcbaee5..491cc84 100644 --- a/crypto/fipsmodule/bn/asm/armv4-mont.pl +++ b/crypto/fipsmodule/bn/asm/armv4-mont.pl
@@ -111,37 +111,13 @@ .code 32 #endif -#if __ARM_MAX_ARCH__>=7 -.align 5 -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-.Lbn_mul_mont -#endif - -.global bn_mul_mont -.type bn_mul_mont,%function +.global bn_mul_mont_nohw +.type bn_mul_mont_nohw,%function .align 5 -bn_mul_mont: -.Lbn_mul_mont: +bn_mul_mont_nohw: ldr ip,[sp,#4] @ load num stmdb sp!,{r0,r2} @ sp points at argument block -#if __ARM_MAX_ARCH__>=7 - tst ip,#7 - bne .Lialu - adr r0,.Lbn_mul_mont - ldr r2,.LOPENSSL_armcap - ldr r0,[r0,r2] -#ifdef __APPLE__ - ldr r0,[r0] -#endif - tst r0,#ARMV7_NEON @ NEON available? - ldmia sp, {r0,r2} - beq .Lialu - add sp,sp,#8 - b bn_mul8x_mont_neon -.align 4 -.Lialu: -#endif cmp ip,#2 mov $num,ip @ load num #ifdef __thumb2__ @@ -292,7 +268,7 @@ moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif -.size bn_mul_mont,.-bn_mul_mont +.size bn_mul_mont_nohw,.-bn_mul_mont_nohw ___ { my ($A0,$A1,$A2,$A3)=map("d$_",(0..3)); @@ -311,6 +287,7 @@ .arch armv7-a .fpu neon +.global bn_mul8x_mont_neon .type bn_mul8x_mont_neon,%function .align 5 bn_mul8x_mont_neon: @@ -744,11 +721,6 @@ } $code.=<<___; .asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" -.align 2 -#if __ARM_MAX_ARCH__>=7 -.comm OPENSSL_armcap_P,4,4 -.hidden OPENSSL_armcap_P -#endif ___ foreach (split("\n",$code)) {
diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc index 90e0117..d62f6e4 100644 --- a/crypto/fipsmodule/bn/bn_test.cc +++ b/crypto/fipsmodule/bn/bn_test.cc
@@ -2902,6 +2902,17 @@ CHECK_ABI(bn_sqr8x_mont, r.data(), a.data(), bn_mulx_adx_capable(), mont->N.d, mont->n0, words); } +#elif defined(OPENSSL_ARM) + if (bn_mul8x_mont_neon_capable(words)) { + CHECK_ABI(bn_mul8x_mont_neon, r.data(), a.data(), b.data(), mont->N.d, + mont->n0, words); + CHECK_ABI(bn_mul8x_mont_neon, r.data(), a.data(), a.data(), mont->N.d, + mont->n0, words); + } + CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), b.data(), mont->N.d, + mont->n0, words); + CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), a.data(), mont->N.d, + mont->n0, words); #else CHECK_ABI(bn_mul_mont, r.data(), a.data(), b.data(), mont->N.d, mont->n0, words);
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index 363a97e..0271160 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h
@@ -409,7 +409,7 @@ int bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); OPENSSL_INLINE int bn_mul4x_mont_capable(size_t num) { - return (num >= 8) && ((num & 3) == 0); + return num >= 8 && (num & 3) == 0; } int bn_mul4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); @@ -419,14 +419,22 @@ int bn_mulx4x_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, size_t num); OPENSSL_INLINE int bn_sqr8x_mont_capable(size_t num) { - return (num >= 8) && ((num & 7) == 0); + return num >= 8 && (num & 7) == 0; } int bn_sqr8x_mont(BN_ULONG *rp, const BN_ULONG *ap, BN_ULONG mulx_adx_capable, const BN_ULONG *np, const BN_ULONG *n0, size_t num); -#endif // defined(OPENSSL_X86_64) - +#elif defined(OPENSSL_ARM) +OPENSSL_INLINE int bn_mul8x_mont_neon_capable(size_t num) { + return (num & 7) == 0 && CRYPTO_is_NEON_capable(); +} +int bn_mul8x_mont_neon(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num); +int bn_mul_mont_nohw(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num); #endif +#endif // OPENSSL_BN_ASM_MONT + #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) #define OPENSSL_BN_ASM_MONT5
diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index 7a4ca2f..cf483b0 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c
@@ -507,8 +507,7 @@ #if defined(OPENSSL_BN_ASM_MONT) && defined(OPENSSL_X86_64) int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, - const BN_ULONG *np, const BN_ULONG *n0, size_t num) -{ + const BN_ULONG *np, const BN_ULONG *n0, size_t num) { if (ap == bp && bn_sqr8x_mont_capable(num)) { return bn_sqr8x_mont(rp, ap, bn_mulx_adx_capable(), np, n0, num); } @@ -521,3 +520,13 @@ return bn_mul_mont_nohw(rp, ap, bp, np, n0, num); } #endif + +#if defined(OPENSSL_BN_ASM_MONT) && defined(OPENSSL_ARM) +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, size_t num) { + if (bn_mul8x_mont_neon_capable(num)) { + return bn_mul8x_mont_neon(rp, ap, bp, np, n0, num); + } + return bn_mul_mont_nohw(rp, ap, bp, np, n0, num); +} +#endif