Fix some timing leaks in the DSA code.

The DSA code is deprecated and will, hopefully, be removed in the future.
Nonetheless, this is easy enough to fix. It's the analog of the work we'd
already done for ECDSA.

- Document more clearly that we don't care about the DSA code.

- Use the existing constant-time modular addition function rather than
  the ad-hoc code.

- Reduce the digest to satisfy modular operations' invariants. (The
  underlying algorithms could accept looser bounds, but we reduce for
  simplicity.) There's no particular reason to do this in constant time,
  but we have the code for it, so we may as well.

- This additionally adds a missing check that num_bits(q) is a multiple
  of 8. We otherwise don't compute the right answer. Verification
  already rejected all 160-, 224-, and 256-bit keys, and we only
  generate DSA parameters where the length of q matches some hash
  function's length, so this is unlikely to cause anyone trouble.

- Use Montgomery reduction to perform the modular multiplication. This
  could be optimized to save a couple Montgomery reductions as in ECDSA,
  but DSA is deprecated, so I haven't bothered optimizing this.

- The reduction from g^k (mod p) to r = g^k (mod p) (mod q) is left
  in variable time, but reversing it would require a discrete log
  anyway. (The corresponding ECDSA operation is much easier to make
  constant-time due to Hasse's theorem, though that's actually still a
  TODO. I need to finish lifting EC_FELEM up the stack.)

Thanks to Keegan Ryan from NCC Group for reporting the modular addition issue
(CVE-2018-0495). The remainder is stuff I noticed along the way.

Update-Note: See the num_bits(q) change.

Change-Id: I4f032b041e2aeb09f9737a39f178c24e6a7fa1cb
Reviewed-on: https://boringssl-review.googlesource.com/29145
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
diff --git a/crypto/dsa/dsa.c b/crypto/dsa/dsa.c
index b97806b..7adde07 100644
--- a/crypto/dsa/dsa.c
+++ b/crypto/dsa/dsa.c
@@ -541,6 +541,22 @@
   OPENSSL_free(sig);
 }
 
+// mod_mul_consttime sets |r| to |a| * |b| modulo |mont->N|, treating |a| and
+// |b| as secret. This function internally uses Montgomery reduction, but
+// neither inputs nor outputs are in Montgomery form.
+static int mod_mul_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                             const BN_MONT_CTX *mont, BN_CTX *ctx) {
+  BN_CTX_start(ctx);
+  BIGNUM *tmp = BN_CTX_get(ctx);
+  // |BN_mod_mul_montgomery| removes a factor of R, so we cancel it with a
+  // single |BN_to_montgomery| which adds one factor of R.
+  int ok = tmp != NULL &&
+           BN_to_montgomery(tmp, a, mont, ctx) &&
+           BN_mod_mul_montgomery(r, tmp, b, mont, ctx);
+  BN_CTX_end(ctx);
+  return ok;
+}
+
 DSA_SIG *DSA_do_sign(const uint8_t *digest, size_t digest_len, const DSA *dsa) {
   BIGNUM *kinv = NULL, *r = NULL, *s = NULL;
   BIGNUM m;
@@ -557,6 +573,14 @@
     goto err;
   }
 
+  // We only support DSA keys that are a multiple of 8 bits. (This is a weaker
+  // check than the one in |DSA_do_check_signature|, which only allows 160-,
+  // 224-, and 256-bit keys.
+  if (BN_num_bits(dsa->q) % 8 != 0) {
+    reason = DSA_R_BAD_Q_VALUE;
+    goto err;
+  }
+
   s = BN_new();
   if (s == NULL) {
     goto err;
@@ -572,9 +596,9 @@
   }
 
   if (digest_len > BN_num_bytes(dsa->q)) {
-    // if the digest length is greater than the size of q use the
-    // BN_num_bits(dsa->q) leftmost bits of the digest, see
-    // fips 186-3, 4.2
+    // If the digest length is greater than the size of |dsa->q| use the
+    // BN_num_bits(dsa->q) leftmost bits of the digest, see FIPS 186-3, 4.2.
+    // Note the above check that |dsa->q| is a multiple of 8 bits.
     digest_len = BN_num_bytes(dsa->q);
   }
 
@@ -582,19 +606,23 @@
     goto err;
   }
 
-  // Compute  s = inv(k) (m + xr) mod q
-  if (!BN_mod_mul(&xr, dsa->priv_key, r, dsa->q, ctx)) {
-    goto err;  // s = xr
+  // |m| is bounded by 2^(num_bits(q)), which is slightly looser than q. This
+  // violates |bn_mod_add_consttime| and |mod_mul_consttime|'s preconditions.
+  // (The underlying algorithms could accept looser bounds, but we reduce for
+  // simplicity.)
+  size_t q_width = bn_minimal_width(dsa->q);
+  if (!bn_resize_words(&m, q_width) ||
+      !bn_resize_words(&xr, q_width)) {
+    goto err;
   }
-  if (!BN_add(s, &xr, &m)) {
-    goto err;  // s = m + xr
-  }
-  if (BN_cmp(s, dsa->q) > 0) {
-    if (!BN_sub(s, s, dsa->q)) {
-      goto err;
-    }
-  }
-  if (!BN_mod_mul(s, s, kinv, dsa->q, ctx)) {
+  bn_reduce_once_in_place(m.d, 0 /* no carry word */, dsa->q->d,
+                          xr.d /* scratch space */, q_width);
+
+  // Compute s = inv(k) (m + xr) mod q. Note |dsa->method_mont_q| is
+  // initialized by |dsa_sign_setup|.
+  if (!mod_mul_consttime(&xr, dsa->priv_key, r, dsa->method_mont_q, ctx) ||
+      !bn_mod_add_consttime(s, &xr, &m, dsa->q, ctx) ||
+      !mod_mul_consttime(s, s, kinv, dsa->method_mont_q, ctx)) {
     goto err;
   }
 
@@ -648,7 +676,7 @@
   }
 
   i = BN_num_bits(dsa->q);
-  // fips 186-3 allows only different sizes for q
+  // FIPS 186-3 allows only different sizes for q.
   if (i != 160 && i != 224 && i != 256) {
     OPENSSL_PUT_ERROR(DSA, DSA_R_BAD_Q_VALUE);
     return 0;
@@ -867,6 +895,13 @@
       // Compute r = (g^k mod p) mod q
       !BN_mod_exp_mont_consttime(r, dsa->g, &k, dsa->p, ctx,
                                  dsa->method_mont_p) ||
+      // Note |BN_mod| below is not constant-time and may leak information about
+      // |r|. |dsa->p| may be significantly larger than |dsa->q|, so this is not
+      // easily performed in constant-time with Montgomery reduction.
+      //
+      // However, |r| at this point is g^k (mod p). It is almost the value of
+      // |r| revealed in the signature anyway (g^k (mod p) (mod q)), going from
+      // it to |k| would require computing a discrete log.
       !BN_mod(r, r, dsa->q, ctx) ||
       // Compute part of 's = inv(k) (m + xr) mod q' using Fermat's Little
       // Theorem.
diff --git a/include/openssl/dsa.h b/include/openssl/dsa.h
index 2966f9d..a5fa767 100644
--- a/include/openssl/dsa.h
+++ b/include/openssl/dsa.h
@@ -73,6 +73,10 @@
 
 // DSA contains functions for signing and verifying with the Digital Signature
 // Algorithm.
+//
+// This module is deprecated and retained for legacy reasons only. It is not
+// considered a priority for performance or hardening work. Do not use it in
+// new code. Use Ed25519, ECDSA with P-256, or RSA instead.
 
 
 // Allocation and destruction.