Fix div.c to divide BN_ULLONG only if BN_CAN_DIVIDE_ULLONG defined.

Since clang-cl uses __udivti3 for __uint128_t division, linking div.obj
fails.  Let me make div.c use BN_CAN_DIVIDE_ULLONG to decide using
__uint128_t division instead of BN_ULLONG.

Bug: https://bugs.chromium.org/p/chromium/issues/detail?id=787617
Change-Id: I3ebe245f6b8917d59409591992efbabddea08187
Reviewed-on: https://boringssl-review.googlesource.com/c/32404
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
diff --git a/crypto/fipsmodule/bn/div.c b/crypto/fipsmodule/bn/div.c
index 57485bd..27b591c 100644
--- a/crypto/fipsmodule/bn/div.c
+++ b/crypto/fipsmodule/bn/div.c
@@ -64,7 +64,7 @@
 #include "internal.h"
 
 
-#if !defined(BN_ULLONG)
+#if !defined(BN_CAN_DIVIDE_ULLONG) && !defined(BN_CAN_USE_INLINE_ASM)
 // bn_div_words divides a double-width |h|,|l| by |d| and returns the result,
 // which must fit in a |BN_ULONG|.
 static BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) {
@@ -135,7 +135,7 @@
   ret |= q;
   return ret;
 }
-#endif  // !defined(BN_ULLONG)
+#endif  // !defined(BN_CAN_DIVIDE_ULLONG) && !defined(BN_CAN_USE_INLINE_ASM)
 
 static inline void bn_div_rem_words(BN_ULONG *quotient_out, BN_ULONG *rem_out,
                                     BN_ULONG n0, BN_ULONG n1, BN_ULONG d0) {
@@ -155,20 +155,18 @@
   //
   // These issues aren't specific to x86 and x86_64, so it might be worthwhile
   // to add more assembly language implementations.
-#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && \
-    (defined(__GNUC__) || defined(__clang__))
+#if defined(BN_CAN_USE_INLINE_ASM) && defined(OPENSSL_X86)
   __asm__ volatile("divl %4"
                    : "=a"(*quotient_out), "=d"(*rem_out)
                    : "a"(n1), "d"(n0), "rm"(d0)
                    : "cc");
-#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
-    (defined(__GNUC__) || defined(__clang__))
+#elif defined(BN_CAN_USE_INLINE_ASM) && defined(OPENSSL_X86_64)
   __asm__ volatile("divq %4"
                    : "=a"(*quotient_out), "=d"(*rem_out)
                    : "a"(n1), "d"(n0), "rm"(d0)
                    : "cc");
 #else
-#if defined(BN_ULLONG)
+#if defined(BN_CAN_DIVIDE_ULLONG)
   BN_ULLONG n = (((BN_ULLONG)n0) << BN_BITS2) | n1;
   *quotient_out = (BN_ULONG)(n / d0);
 #else
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index 9796831..fb8d11f 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -185,6 +185,10 @@
 #error "Must define either OPENSSL_32_BIT or OPENSSL_64_BIT"
 #endif
 
+#if !defined(OPENSSL_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
+#define BN_CAN_USE_INLINE_ASM
+#endif
+
 // |BN_mod_exp_mont_consttime| is based on the assumption that the L1 data
 // cache line width of the target processor is at least the following value.
 #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH 64