Use Montgomery reduction in DSA verify

This disconnects some calls to BN_mod_mul which, having no limits on its
input sizes, currently calls BN_mul. Instead, catch it up to ECDSA and
use Montgomery reduction.

Bug: 406497222
Change-Id: Ib99e96090e31858cd9bceba576418e158fb37521
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/78067
Reviewed-by: Bob Beck <bbe@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
Auto-Submit: David Benjamin <davidben@google.com>
diff --git a/crypto/dsa/dsa.cc b/crypto/dsa/dsa.cc
index c9566b1..d372c7c 100644
--- a/crypto/dsa/dsa.cc
+++ b/crypto/dsa/dsa.cc
@@ -679,9 +679,20 @@
       goto err;
     }
 
-    // Calculate W = inv(S) mod Q
-    // save W in u2
-    if (BN_mod_inverse(&u2, sig->s, dsa->q, ctx) == NULL) {
+    if (!BN_MONT_CTX_set_locked((BN_MONT_CTX **)&dsa->method_mont_p,
+                                (CRYPTO_MUTEX *)&dsa->method_mont_lock, dsa->p,
+                                ctx) ||
+        !BN_MONT_CTX_set_locked((BN_MONT_CTX **)&dsa->method_mont_q,
+                                (CRYPTO_MUTEX *)&dsa->method_mont_lock, dsa->q,
+                                ctx)) {
+      goto err;
+    }
+
+    // Calculate W = inv(S) mod Q, in the Montgomery domain. This is slightly
+    // more efficiently computed as FromMont(s)^-1 = (s * R^-1)^-1 = s^-1 * R,
+    // instead of ToMont(s^-1) = s^-1 * R.
+    if (!BN_from_montgomery(&u2, sig->s, dsa->method_mont_q, ctx) ||
+        !BN_mod_inverse(&u2, &u2, dsa->q, ctx)) {
       goto err;
     }
 
@@ -698,19 +709,15 @@
       goto err;
     }
 
-    // u1 = M * w mod q
-    if (!BN_mod_mul(&u1, &u1, &u2, dsa->q, ctx)) {
+    // u1 = M * w mod q. w was stored in the Montgomery domain while M was not,
+    // so the result will already be out of the Montgomery domain.
+    if (!BN_mod_mul_montgomery(&u1, &u1, &u2, dsa->method_mont_q, ctx)) {
       goto err;
     }
 
-    // u2 = r * w mod q
-    if (!BN_mod_mul(&u2, sig->r, &u2, dsa->q, ctx)) {
-      goto err;
-    }
-
-    if (!BN_MONT_CTX_set_locked((BN_MONT_CTX **)&dsa->method_mont_p,
-                                (CRYPTO_MUTEX *)&dsa->method_mont_lock, dsa->p,
-                                ctx)) {
+    // u2 = r * w mod q. w was stored in the Montgomery domain while r was not,
+    // so the result will already be out of the Montgomery domain.
+    if (!BN_mod_mul_montgomery(&u2, sig->r, &u2, dsa->method_mont_q, ctx)) {
       goto err;
     }
 
@@ -719,7 +726,6 @@
       goto err;
     }
 
-    // BN_copy(&u1,&t1);
     // let u1 = u1 mod q
     if (!BN_mod(&u1, &t1, dsa->q, ctx)) {
       goto err;