Simplify mont5 table computation.
The unrolled loops appear to have negligible perf impact:
Before:
Did 18480 RSA 2048 signing operations in 10005085us (1847.1 ops/sec)
Did 2720 RSA 4096 signing operations in 10056337us (270.5 ops/sec)
After:
Did 18480 RSA 2048 signing operations in 10012218us (1845.7 ops/sec) [-0.1%]
Did 2700 RSA 4096 signing operations in 10003972us (269.9 ops/sec) [-0.2%]
Change-Id: I29073c373a03a9798f6e04016626e6ab910e893a
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/52826
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c
index 38013ed..e2e0d12 100644
--- a/crypto/fipsmodule/bn/exponentiation.c
+++ b/crypto/fipsmodule/bn/exponentiation.c
@@ -1074,7 +1074,7 @@
bn_scatter5(tmp.d, top, powerbuf, i);
}
// Compute odd powers |i| based on |i - 1|, then all powers |i * 2^j|.
- for (i = 3; i < 8; i += 2) {
+ for (i = 3; i < 32; i += 2) {
bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
bn_scatter5(tmp.d, top, powerbuf, i);
for (int j = 2 * i; j < 32; j *= 2) {
@@ -1082,17 +1082,6 @@
bn_scatter5(tmp.d, top, powerbuf, j);
}
}
- // These two loops are the above with the inner loop unrolled.
- for (; i < 16; i += 2) {
- bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
- bn_scatter5(tmp.d, top, powerbuf, i);
- bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
- bn_scatter5(tmp.d, top, powerbuf, 2 * i);
- }
- for (; i < 32; i += 2) {
- bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
- bn_scatter5(tmp.d, top, powerbuf, i);
- }
bits--;
for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) {