Remove non-ASM version of |bn_mul_mont| in bn/generic.c.
When building in OPENSSL_NO_ASM mode, MSVC complains about unreachable
code. The redundant initialization of |i| is the main problem. The
skipping of the first test of the condition |i < num| with |goto| was
also confusing.
It turns out that |bn_mul_mont| is only called when assembly language
optimizations are available, but in that case the assmebly language
versions will always be used instead. Although this code will be
compiled in |OPENSSL_NO_ASM| builds, it is never called in
|OPENSSL_NO_ASM| builds. Thus, it can just be removed.
Change-Id: Id551899b2602824978edc1a1cb0703b76516808d
Reviewed-on: https://boringssl-review.googlesource.com/5550
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/bn/generic.c b/crypto/bn/generic.c
index c240a54..7fd4819 100644
--- a/crypto/bn/generic.c
+++ b/crypto/bn/generic.c
@@ -1022,110 +1022,4 @@
r[7] = c2;
}
-#if defined(OPENSSL_NO_ASM) || (!defined(OPENSSL_ARM) && !defined(OPENSSL_X86_64))
-/* This is essentially reference implementation, which may or may not
- * result in performance improvement. E.g. on IA-32 this routine was
- * observed to give 40% faster rsa1024 private key operations and 10%
- * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
- * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
- * reference implementation, one to be used as starting point for
- * platform-specific assembler. Mentioned numbers apply to compiler
- * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and
- * can vary not only from platform to platform, but even for compiler
- * versions. Assembler vs. assembler improvement coefficients can
- * [and are known to] differ and are to be documented elsewhere. */
-int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
- const BN_ULONG *np, const BN_ULONG *n0p, int num) {
- BN_ULONG c0, c1, ml, *tp, n0;
-#ifdef mul64
- BN_ULONG mh;
-#endif
- volatile BN_ULONG *vp;
- int i = 0, j;
-
-#if 0 /* template for platform-specific implementation */
- if (ap==bp) return bn_sqr_mont(rp,ap,np,n0p,num);
-#endif
- vp = tp = alloca((num + 2) * sizeof(BN_ULONG));
-
- n0 = *n0p;
-
- c0 = 0;
- ml = bp[0];
-#ifdef mul64
- mh = HBITS(ml);
- ml = LBITS(ml);
- for (j = 0; j < num; ++j) {
- mul(tp[j], ap[j], ml, mh, c0);
- }
-#else
- for (j = 0; j < num; ++j) {
- mul(tp[j], ap[j], ml, c0);
- }
-#endif
-
- tp[num] = c0;
- tp[num + 1] = 0;
- goto enter;
-
- for (; i < num; i++) {
- c0 = 0;
- ml = bp[i];
-#ifdef mul64
- mh = HBITS(ml);
- ml = LBITS(ml);
- for (j = 0; j < num; ++j) {
- mul_add(tp[j], ap[j], ml, mh, c0);
- }
-#else
- for (j = 0; j < num; ++j) {
- mul_add(tp[j], ap[j], ml, c0);
- }
-#endif
- c1 = (tp[num] + c0) & BN_MASK2;
- tp[num] = c1;
- tp[num + 1] = (c1 < c0 ? 1 : 0);
- enter:
- c1 = tp[0];
- ml = (c1 * n0) & BN_MASK2;
- c0 = 0;
-#ifdef mul64
- mh = HBITS(ml);
- ml = LBITS(ml);
- mul_add(c1, np[0], ml, mh, c0);
-#else
- mul_add(c1, ml, np[0], c0);
-#endif
- for (j = 1; j < num; j++) {
- c1 = tp[j];
-#ifdef mul64
- mul_add(c1, np[j], ml, mh, c0);
-#else
- mul_add(c1, ml, np[j], c0);
-#endif
- tp[j - 1] = c1 & BN_MASK2;
- }
- c1 = (tp[num] + c0) & BN_MASK2;
- tp[num - 1] = c1;
- tp[num] = tp[num + 1] + (c1 < c0 ? 1 : 0);
- }
-
- if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
- c0 = bn_sub_words(rp, tp, np, num);
- if (tp[num] != 0 || c0 == 0) {
- for (i = 0; i < num + 2; i++) {
- vp[i] = 0;
- }
- return 1;
- }
- }
- for (i = 0; i < num; i++) {
- rp[i] = tp[i], vp[i] = 0;
- }
- vp[num] = 0;
- vp[num + 1] = 0;
- return 1;
-}
-#endif
-
#endif