Simplify mont5 table computation. The unrolled loops appear to have negligible perf impact: Before: Did 18480 RSA 2048 signing operations in 10005085us (1847.1 ops/sec) Did 2720 RSA 4096 signing operations in 10056337us (270.5 ops/sec) After: Did 18480 RSA 2048 signing operations in 10012218us (1845.7 ops/sec) [-0.1%] Did 2700 RSA 4096 signing operations in 10003972us (269.9 ops/sec) [-0.2%] Change-Id: I29073c373a03a9798f6e04016626e6ab910e893a Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/52826 Commit-Queue: David Benjamin <davidben@google.com> Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c index 38013ed..e2e0d12 100644 --- a/crypto/fipsmodule/bn/exponentiation.c +++ b/crypto/fipsmodule/bn/exponentiation.c
@@ -1074,7 +1074,7 @@ bn_scatter5(tmp.d, top, powerbuf, i); } // Compute odd powers |i| based on |i - 1|, then all powers |i * 2^j|. - for (i = 3; i < 8; i += 2) { + for (i = 3; i < 32; i += 2) { bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); bn_scatter5(tmp.d, top, powerbuf, i); for (int j = 2 * i; j < 32; j *= 2) { @@ -1082,17 +1082,6 @@ bn_scatter5(tmp.d, top, powerbuf, j); } } - // These two loops are the above with the inner loop unrolled. - for (; i < 16; i += 2) { - bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); - bn_scatter5(tmp.d, top, powerbuf, i); - bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); - bn_scatter5(tmp.d, top, powerbuf, 2 * i); - } - for (; i < 32; i += 2) { - bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); - bn_scatter5(tmp.d, top, powerbuf, i); - } bits--; for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) {