Rearrange bn/generic.c

In preparation for adding aarch64 bn_add_words and bn_sub_words
implementations, rearrange this so we first define BN_ADD_ASM and
BN_MUL_ASM defines, and then gate fallbacks on that. This also required
moving some functions around to group the add/mul functions together.

Change-Id: I59281706db35ad3fb1186a4afd345a820f5542d2
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/56965
Reviewed-by: Bob Beck <bbe@google.com>
Commit-Queue: Bob Beck <bbe@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
Auto-Submit: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/bn/generic.c b/crypto/fipsmodule/bn/generic.c
index ee80a3c..628cc53 100644
--- a/crypto/fipsmodule/bn/generic.c
+++ b/crypto/fipsmodule/bn/generic.c
@@ -61,11 +61,20 @@
 #include "internal.h"
 
 
-// This file has two other implementations: x86 assembly language in
-// asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c.
-#if defined(OPENSSL_NO_ASM) || \
-    !(defined(OPENSSL_X86) ||  \
-      (defined(OPENSSL_X86_64) && (defined(__GNUC__) || defined(__clang__))))
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86)
+// See asm/bn-586.pl.
+#define BN_ADD_ASM
+#define BN_MUL_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
+    (defined(__GNUC__) || defined(__clang__))
+// See asm/x86_64-gcc.c
+#define BN_ADD_ASM
+#define BN_MUL_ASM
+#endif
+
+#if !defined(BN_MUL_ASM)
 
 #ifdef BN_ULLONG
 #define mul_add(r, a, w, c)               \
@@ -201,157 +210,6 @@
   }
 }
 
-#ifdef BN_ULLONG
-BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
-                      size_t n) {
-  BN_ULLONG ll = 0;
-
-  if (n == 0) {
-    return 0;
-  }
-
-  while (n & ~3) {
-    ll += (BN_ULLONG)a[0] + b[0];
-    r[0] = (BN_ULONG)ll;
-    ll >>= BN_BITS2;
-    ll += (BN_ULLONG)a[1] + b[1];
-    r[1] = (BN_ULONG)ll;
-    ll >>= BN_BITS2;
-    ll += (BN_ULLONG)a[2] + b[2];
-    r[2] = (BN_ULONG)ll;
-    ll >>= BN_BITS2;
-    ll += (BN_ULLONG)a[3] + b[3];
-    r[3] = (BN_ULONG)ll;
-    ll >>= BN_BITS2;
-    a += 4;
-    b += 4;
-    r += 4;
-    n -= 4;
-  }
-  while (n) {
-    ll += (BN_ULLONG)a[0] + b[0];
-    r[0] = (BN_ULONG)ll;
-    ll >>= BN_BITS2;
-    a++;
-    b++;
-    r++;
-    n--;
-  }
-  return (BN_ULONG)ll;
-}
-
-#else  // !BN_ULLONG
-
-BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
-                      size_t n) {
-  BN_ULONG c, l, t;
-
-  if (n == 0) {
-    return (BN_ULONG)0;
-  }
-
-  c = 0;
-  while (n & ~3) {
-    t = a[0];
-    t += c;
-    c = (t < c);
-    l = t + b[0];
-    c += (l < t);
-    r[0] = l;
-    t = a[1];
-    t += c;
-    c = (t < c);
-    l = t + b[1];
-    c += (l < t);
-    r[1] = l;
-    t = a[2];
-    t += c;
-    c = (t < c);
-    l = t + b[2];
-    c += (l < t);
-    r[2] = l;
-    t = a[3];
-    t += c;
-    c = (t < c);
-    l = t + b[3];
-    c += (l < t);
-    r[3] = l;
-    a += 4;
-    b += 4;
-    r += 4;
-    n -= 4;
-  }
-  while (n) {
-    t = a[0];
-    t += c;
-    c = (t < c);
-    l = t + b[0];
-    c += (l < t);
-    r[0] = l;
-    a++;
-    b++;
-    r++;
-    n--;
-  }
-  return (BN_ULONG)c;
-}
-
-#endif  // !BN_ULLONG
-
-BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
-                      size_t n) {
-  BN_ULONG t1, t2;
-  int c = 0;
-
-  if (n == 0) {
-    return (BN_ULONG)0;
-  }
-
-  while (n & ~3) {
-    t1 = a[0];
-    t2 = b[0];
-    r[0] = t1 - t2 - c;
-    if (t1 != t2) {
-      c = (t1 < t2);
-    }
-    t1 = a[1];
-    t2 = b[1];
-    r[1] = t1 - t2 - c;
-    if (t1 != t2) {
-      c = (t1 < t2);
-    }
-    t1 = a[2];
-    t2 = b[2];
-    r[2] = t1 - t2 - c;
-    if (t1 != t2) {
-      c = (t1 < t2);
-    }
-    t1 = a[3];
-    t2 = b[3];
-    r[3] = t1 - t2 - c;
-    if (t1 != t2) {
-      c = (t1 < t2);
-    }
-    a += 4;
-    b += 4;
-    r += 4;
-    n -= 4;
-  }
-  while (n) {
-    t1 = a[0];
-    t2 = b[0];
-    r[0] = t1 - t2 - c;
-    if (t1 != t2) {
-      c = (t1 < t2);
-    }
-    a++;
-    b++;
-    r++;
-    n--;
-  }
-  return c;
-}
-
 // mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0)
 // mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0)
 // sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0)
@@ -708,4 +566,159 @@
 #undef sqr_add_c
 #undef sqr_add_c2
 
-#endif
+#endif  // !BN_MUL_ASM
+
+#if !defined(BN_ADD_ASM)
+
+#ifdef BN_ULLONG
+BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                      size_t n) {
+  BN_ULLONG ll = 0;
+
+  if (n == 0) {
+    return 0;
+  }
+
+  while (n & ~3) {
+    ll += (BN_ULLONG)a[0] + b[0];
+    r[0] = (BN_ULONG)ll;
+    ll >>= BN_BITS2;
+    ll += (BN_ULLONG)a[1] + b[1];
+    r[1] = (BN_ULONG)ll;
+    ll >>= BN_BITS2;
+    ll += (BN_ULLONG)a[2] + b[2];
+    r[2] = (BN_ULONG)ll;
+    ll >>= BN_BITS2;
+    ll += (BN_ULLONG)a[3] + b[3];
+    r[3] = (BN_ULONG)ll;
+    ll >>= BN_BITS2;
+    a += 4;
+    b += 4;
+    r += 4;
+    n -= 4;
+  }
+  while (n) {
+    ll += (BN_ULLONG)a[0] + b[0];
+    r[0] = (BN_ULONG)ll;
+    ll >>= BN_BITS2;
+    a++;
+    b++;
+    r++;
+    n--;
+  }
+  return (BN_ULONG)ll;
+}
+
+#else  // !BN_ULLONG
+
+BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                      size_t n) {
+  BN_ULONG c, l, t;
+
+  if (n == 0) {
+    return (BN_ULONG)0;
+  }
+
+  c = 0;
+  while (n & ~3) {
+    t = a[0];
+    t += c;
+    c = (t < c);
+    l = t + b[0];
+    c += (l < t);
+    r[0] = l;
+    t = a[1];
+    t += c;
+    c = (t < c);
+    l = t + b[1];
+    c += (l < t);
+    r[1] = l;
+    t = a[2];
+    t += c;
+    c = (t < c);
+    l = t + b[2];
+    c += (l < t);
+    r[2] = l;
+    t = a[3];
+    t += c;
+    c = (t < c);
+    l = t + b[3];
+    c += (l < t);
+    r[3] = l;
+    a += 4;
+    b += 4;
+    r += 4;
+    n -= 4;
+  }
+  while (n) {
+    t = a[0];
+    t += c;
+    c = (t < c);
+    l = t + b[0];
+    c += (l < t);
+    r[0] = l;
+    a++;
+    b++;
+    r++;
+    n--;
+  }
+  return (BN_ULONG)c;
+}
+
+#endif  // !BN_ULLONG
+
+BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                      size_t n) {
+  BN_ULONG t1, t2;
+  int c = 0;
+
+  if (n == 0) {
+    return (BN_ULONG)0;
+  }
+
+  while (n & ~3) {
+    t1 = a[0];
+    t2 = b[0];
+    r[0] = t1 - t2 - c;
+    if (t1 != t2) {
+      c = (t1 < t2);
+    }
+    t1 = a[1];
+    t2 = b[1];
+    r[1] = t1 - t2 - c;
+    if (t1 != t2) {
+      c = (t1 < t2);
+    }
+    t1 = a[2];
+    t2 = b[2];
+    r[2] = t1 - t2 - c;
+    if (t1 != t2) {
+      c = (t1 < t2);
+    }
+    t1 = a[3];
+    t2 = b[3];
+    r[3] = t1 - t2 - c;
+    if (t1 != t2) {
+      c = (t1 < t2);
+    }
+    a += 4;
+    b += 4;
+    r += 4;
+    n -= 4;
+  }
+  while (n) {
+    t1 = a[0];
+    t2 = b[0];
+    r[0] = t1 - t2 - c;
+    if (t1 != t2) {
+      c = (t1 < t2);
+    }
+    a++;
+    b++;
+    r++;
+    n--;
+  }
+  return c;
+}
+
+#endif  // !BN_ADD_ASM