Rename bn_mul_mont to bn_mul_mont_words

https://boringssl-review.googlesource.com/c/boringssl/+/75647 tripped an
issue where some build target was linking BoringSSL with a copy of
OpenSSL. The particular combination of hidden and visible symbols meant
that specifically changing bn_mul_mont impacted things. (Internal link
for posterity: go/bn-mul-mont-symbol-collision resolves to the offending
package.)

We very much don't support that right now, but it's easiest to just
rename this symbol right now. Long-term, I suspect we should properly
prefix our symbols. (That will probably involve putting all non-asm
internal symbols behind a C++ namespace, at which point we'll probably
want to reevaluate our naming conventions anyway[*].)

[*] I'd really like to call the new words-based mul and sqr bn_mul_words
and bn_sqr_words, but OpenSSL has assembly functions with those names
that do something else. go/bn-mul-mont-symbol-collision seems
unaffected, but this is perhaps a sign that we should start namespacing
our internals sooner rather than later.

Change-Id: I8c6f362fd01bce51d89b58a08531910c6f1bd0a9
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/80308
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
Auto-Submit: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/bn/asm/armv8-mont.pl b/crypto/fipsmodule/bn/asm/armv8-mont.pl
index fe4d8cf..80e0eea 100644
--- a/crypto/fipsmodule/bn/asm/armv8-mont.pl
+++ b/crypto/fipsmodule/bn/asm/armv8-mont.pl
@@ -60,7 +60,7 @@
  $lo1,$hi1,$nj,$m1,$nlo,$nhi,
  $ovf, $i,$j,$tp,$tj) = map("x$_",6..17,19..24);
 
-# void bn_mul_mont(
+# void bn_mul_mont_words(
 $rp="x0";	# BN_ULONG *rp,
 $ap="x1";	# const BN_ULONG *ap,
 $bp="x2";	# const BN_ULONG *bp,
@@ -71,10 +71,10 @@
 $code.=<<___;
 .text
 
-.globl	bn_mul_mont
-.type	bn_mul_mont,%function
+.globl	bn_mul_mont_words
+.type	bn_mul_mont_words,%function
 .align	5
-bn_mul_mont:
+bn_mul_mont_words:
 	AARCH64_SIGN_LINK_REGISTER
 	tst	$num,#7
 	b.eq	__bn_sqr8x_mont
@@ -275,7 +275,7 @@
 	ldr	x29,[sp],#64
 	AARCH64_VALIDATE_LINK_REGISTER
 	ret
-.size	bn_mul_mont,.-bn_mul_mont
+.size	bn_mul_mont_words,.-bn_mul_mont_words
 ___
 {
 ########################################################################
@@ -292,7 +292,7 @@
 .align	5
 __bn_sqr8x_mont:
 	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
-	// only from bn_mul_mont which has already signed the return address.
+	// only from bn_mul_mont_words which has already signed the return address.
 	cmp	$ap,$bp
 	b.ne	__bn_mul4x_mont
 .Lsqr8x_mont:
@@ -1075,7 +1075,7 @@
 .align	5
 __bn_mul4x_mont:
 	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
-	// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
+	// only from bn_mul_mont_words or __bn_mul8x_mont which have already signed the
 	// return address.
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
diff --git a/crypto/fipsmodule/bn/asm/x86-mont.pl b/crypto/fipsmodule/bn/asm/x86-mont.pl
index c3e30cb..59cfea6 100755
--- a/crypto/fipsmodule/bn/asm/x86-mont.pl
+++ b/crypto/fipsmodule/bn/asm/x86-mont.pl
@@ -49,7 +49,7 @@
 
 $sse2=1;
 
-&function_begin("bn_mul_mont");
+&function_begin("bn_mul_mont_words");
 
 $i="edx";
 $j="ecx";
@@ -325,7 +325,7 @@
 
 	&mov	("esp",$_sp);		# pull saved stack pointer
 	# No return value
-&function_end("bn_mul_mont");
+&function_end("bn_mul_mont_words");
 
 &asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
 
diff --git a/crypto/fipsmodule/bn/bn_test.cc b/crypto/fipsmodule/bn/bn_test.cc
index 7c1ad40..6d241a0 100644
--- a/crypto/fipsmodule/bn/bn_test.cc
+++ b/crypto/fipsmodule/bn/bn_test.cc
@@ -2858,10 +2858,10 @@
     CHECK_ABI(bn_mul_mont_nohw, r.data(), a.data(), a.data(), mont->N.d,
               mont->n0, words);
 #else
-    CHECK_ABI(bn_mul_mont, r.data(), a.data(), b.data(), mont->N.d, mont->n0,
-              words);
-    CHECK_ABI(bn_mul_mont, r.data(), a.data(), a.data(), mont->N.d, mont->n0,
-              words);
+    CHECK_ABI(bn_mul_mont_words, r.data(), a.data(), b.data(), mont->N.d,
+              mont->n0, words);
+    CHECK_ABI(bn_mul_mont_words, r.data(), a.data(), a.data(), mont->N.d,
+              mont->n0, words);
 #endif
   }
 }
@@ -2884,10 +2884,11 @@
     a[0] = 1;
     b[0] = 42;
 
-    bn_mul_mont(r.data(), a.data(), b.data(), mont->N.d, mont->n0, words);
+    bn_mul_mont_words(r.data(), a.data(), b.data(), mont->N.d, mont->n0, words);
     CHECK_ABI(bn_scatter5, r.data(), words, table.data(), 13);
     for (size_t i = 0; i < 32; i++) {
-      bn_mul_mont(r.data(), a.data(), b.data(), mont->N.d, mont->n0, words);
+      bn_mul_mont_words(r.data(), a.data(), b.data(), mont->N.d, mont->n0,
+                        words);
       bn_scatter5(r.data(), words, table.data(), i);
     }
     CHECK_ABI(bn_gather5, r.data(), words, table.data(), 13);
diff --git a/crypto/fipsmodule/bn/exponentiation.cc.inc b/crypto/fipsmodule/bn/exponentiation.cc.inc
index 205f4b4..a6c7d55 100644
--- a/crypto/fipsmodule/bn/exponentiation.cc.inc
+++ b/crypto/fipsmodule/bn/exponentiation.cc.inc
@@ -560,9 +560,9 @@
   // |bn_mul_mont_gather5| and |bn_power5| implement the "almost" reduction
   // variant, so the values here may not be fully reduced. They are bounded by R
   // (i.e. they fit in |top| words), not |m|. Additionally, we pass these
-  // "almost" reduced inputs into |bn_mul_mont|, which implements the normal
-  // reduction variant. Given those inputs, |bn_mul_mont| may not give reduced
-  // output, but it will still produce "almost" reduced output.
+  // "almost" reduced inputs into |bn_mul_mont_words|, which implements the
+  // normal reduction variant. Given those inputs, |bn_mul_mont_words| may not
+  // give reduced output, but it will still produce "almost" reduced output.
   //
   // TODO(davidben): Using "almost" reduction complicates analysis of this code,
   // and its interaction with other parts of the project. Determine whether this
@@ -578,12 +578,12 @@
     const BN_ULONG *n0 = mont->n0;
     bn_scatter5(tmp.d, top, powerbuf, 0);
     bn_scatter5(am.d, am.width, powerbuf, 1);
-    bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
+    bn_mul_mont_words(tmp.d, am.d, am.d, np, n0, top);
     bn_scatter5(tmp.d, top, powerbuf, 2);
 
     // Square to compute powers of two.
     for (i = 4; i < 32; i *= 2) {
-      bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
+      bn_mul_mont_words(tmp.d, tmp.d, tmp.d, np, n0, top);
       bn_scatter5(tmp.d, top, powerbuf, i);
     }
     // Compute odd powers |i| based on |i - 1|, then all powers |i * 2^j|.
@@ -591,7 +591,7 @@
       bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
       bn_scatter5(tmp.d, top, powerbuf, i);
       for (int j = 2 * i; j < 32; j *= 2) {
-        bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
+        bn_mul_mont_words(tmp.d, tmp.d, tmp.d, np, n0, top);
         bn_scatter5(tmp.d, top, powerbuf, j);
       }
     }
@@ -614,11 +614,11 @@
           wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
         }
 
-        bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
-        bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
-        bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
-        bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
-        bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
+        bn_mul_mont_words(tmp.d, tmp.d, tmp.d, np, n0, top);
+        bn_mul_mont_words(tmp.d, tmp.d, tmp.d, np, n0, top);
+        bn_mul_mont_words(tmp.d, tmp.d, tmp.d, np, n0, top);
+        bn_mul_mont_words(tmp.d, tmp.d, tmp.d, np, n0, top);
+        bn_mul_mont_words(tmp.d, tmp.d, tmp.d, np, n0, top);
         bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
       }
     } else {
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index ee881a9..3619ddd 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -264,9 +264,9 @@
 // values for other operations.
 //
 // This limit is set so that one number fits within 1 KiB, giving room to
-// allocate a few of them on the stack in |bn_mul_mont| without exceeding a page
-// (4 KiB). It is also set to limit the DoS impact of large RSA, DH, and DSA
-// keys, which scale cubicly.
+// allocate a few of them on the stack in |bn_mul_mont_words| without exceeding
+// a page (4 KiB). It is also set to limit the DoS impact of large RSA, DH, and
+// DSA keys, which scale cubically.
 #define BN_MONTGOMERY_MAX_WORDS (8192 / BN_BITS2)
 
 struct bn_mont_ctx_st {
@@ -283,7 +283,7 @@
     (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
      defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
 #define OPENSSL_BN_ASM_MONT
-// bn_mul_mont writes |ap| * |bp| mod |np| to |rp|, each |num| words
+// bn_mul_mont_words writes |ap| * |bp| mod |np| to |rp|, each |num| words
 // long. Inputs and outputs are in Montgomery form. |n0| is a pointer to the
 // corresponding field in |BN_MONT_CTX|.
 //
@@ -301,9 +301,9 @@
 //
 // See also discussion in |ToWord| in abi_test.h for notes on smaller-than-word
 // inputs.
-void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
-                 const BN_ULONG *np, const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS],
-                 size_t num);
+void bn_mul_mont_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                       const BN_ULONG *np,
+                       const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS], size_t num);
 
 #if defined(OPENSSL_X86_64)
 inline int bn_mulx_adx_capable(void) {
diff --git a/crypto/fipsmodule/bn/montgomery.cc.inc b/crypto/fipsmodule/bn/montgomery.cc.inc
index fe5c3a2..6281b29 100644
--- a/crypto/fipsmodule/bn/montgomery.cc.inc
+++ b/crypto/fipsmodule/bn/montgomery.cc.inc
@@ -308,16 +308,16 @@
   }
 
 #if defined(OPENSSL_BN_ASM_MONT)
-  // |bn_mul_mont| requires at least 128 bits of limbs.
+  // |bn_mul_mont_words| requires at least 128 bits of limbs.
   int num = mont->N.width;
   if (num >= (128 / BN_BITS2) && a->width == num && b->width == num) {
     if (!bn_wexpand(r, num)) {
       return 0;
     }
-    // This bound is implied by |bn_mont_ctx_set_N_and_n0|. |bn_mul_mont|
+    // This bound is implied by |bn_mont_ctx_set_N_and_n0|. |bn_mul_mont_words|
     // allocates |num| words on the stack, so |num| cannot be too large.
     assert((size_t)num <= BN_MONTGOMERY_MAX_WORDS);
-    bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num);
+    bn_mul_mont_words(r->d, a->d, b->d, mont->N.d, mont->n0, num);
     r->neg = 0;
     r->width = num;
     return 1;
@@ -358,9 +358,9 @@
   }
 
 #if defined(OPENSSL_BN_ASM_MONT)
-  // |bn_mul_mont| requires at least 128 bits of limbs.
+  // |bn_mul_mont_words| requires at least 128 bits of limbs.
   if (num >= (128 / BN_BITS2)) {
-    bn_mul_mont(r, a, b, mont->N.d, mont->n0, num);
+    bn_mul_mont_words(r, a, b, mont->N.d, mont->n0, num);
     return;
   }
 #endif
@@ -381,8 +381,8 @@
 }
 
 #if defined(OPENSSL_BN_ASM_MONT) && defined(OPENSSL_X86_64)
-void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
-                const BN_ULONG *np, const BN_ULONG *n0, size_t num) {
+void bn_mul_mont_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                       const BN_ULONG *np, const BN_ULONG *n0, size_t num) {
   if (ap == bp && bn_sqr8x_mont_capable(num)) {
     bn_sqr8x_mont(rp, ap, bn_mulx_adx_capable(), np, n0, num);
   } else if (bn_mulx4x_mont_capable(num)) {
@@ -396,8 +396,8 @@
 #endif
 
 #if defined(OPENSSL_BN_ASM_MONT) && defined(OPENSSL_ARM)
-void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
-                 const BN_ULONG *np, const BN_ULONG *n0, size_t num) {
+void bn_mul_mont_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                       const BN_ULONG *np, const BN_ULONG *n0, size_t num) {
   if (bn_mul8x_mont_neon_capable(num)) {
     bn_mul8x_mont_neon(rp, ap, bp, np, n0, num);
   } else {
diff --git a/gen/bcm/armv8-mont-apple.S b/gen/bcm/armv8-mont-apple.S
index 6aad968..fadf3e2 100644
--- a/gen/bcm/armv8-mont-apple.S
+++ b/gen/bcm/armv8-mont-apple.S
@@ -6,11 +6,11 @@
 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__APPLE__)
 .text
 
-.globl	_bn_mul_mont
-.private_extern	_bn_mul_mont
+.globl	_bn_mul_mont_words
+.private_extern	_bn_mul_mont_words
 
 .align	5
-_bn_mul_mont:
+_bn_mul_mont_words:
 	AARCH64_SIGN_LINK_REGISTER
 	tst	x5,#7
 	b.eq	__bn_sqr8x_mont
@@ -216,7 +216,7 @@
 .align	5
 __bn_sqr8x_mont:
 	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
-	// only from bn_mul_mont which has already signed the return address.
+	// only from bn_mul_mont_words which has already signed the return address.
 	cmp	x1,x2
 	b.ne	__bn_mul4x_mont
 Lsqr8x_mont:
@@ -978,7 +978,7 @@
 .align	5
 __bn_mul4x_mont:
 	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
-	// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
+	// only from bn_mul_mont_words or __bn_mul8x_mont which have already signed the
 	// return address.
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
diff --git a/gen/bcm/armv8-mont-linux.S b/gen/bcm/armv8-mont-linux.S
index e49322b..574647f 100644
--- a/gen/bcm/armv8-mont-linux.S
+++ b/gen/bcm/armv8-mont-linux.S
@@ -6,11 +6,11 @@
 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__)
 .text
 
-.globl	bn_mul_mont
-.hidden	bn_mul_mont
-.type	bn_mul_mont,%function
+.globl	bn_mul_mont_words
+.hidden	bn_mul_mont_words
+.type	bn_mul_mont_words,%function
 .align	5
-bn_mul_mont:
+bn_mul_mont_words:
 	AARCH64_SIGN_LINK_REGISTER
 	tst	x5,#7
 	b.eq	__bn_sqr8x_mont
@@ -211,12 +211,12 @@
 	ldr	x29,[sp],#64
 	AARCH64_VALIDATE_LINK_REGISTER
 	ret
-.size	bn_mul_mont,.-bn_mul_mont
+.size	bn_mul_mont_words,.-bn_mul_mont_words
 .type	__bn_sqr8x_mont,%function
 .align	5
 __bn_sqr8x_mont:
 	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
-	// only from bn_mul_mont which has already signed the return address.
+	// only from bn_mul_mont_words which has already signed the return address.
 	cmp	x1,x2
 	b.ne	__bn_mul4x_mont
 .Lsqr8x_mont:
@@ -978,7 +978,7 @@
 .align	5
 __bn_mul4x_mont:
 	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
-	// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
+	// only from bn_mul_mont_words or __bn_mul8x_mont which have already signed the
 	// return address.
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
diff --git a/gen/bcm/armv8-mont-win.S b/gen/bcm/armv8-mont-win.S
index 4091a5a..84f3473 100644
--- a/gen/bcm/armv8-mont-win.S
+++ b/gen/bcm/armv8-mont-win.S
@@ -6,13 +6,13 @@
 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32)
 .text
 
-.globl	bn_mul_mont
+.globl	bn_mul_mont_words
 
-.def bn_mul_mont
+.def bn_mul_mont_words
    .type 32
 .endef
 .align	5
-bn_mul_mont:
+bn_mul_mont_words:
 	AARCH64_SIGN_LINK_REGISTER
 	tst	x5,#7
 	b.eq	__bn_sqr8x_mont
@@ -220,7 +220,7 @@
 .align	5
 __bn_sqr8x_mont:
 	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
-	// only from bn_mul_mont which has already signed the return address.
+	// only from bn_mul_mont_words which has already signed the return address.
 	cmp	x1,x2
 	b.ne	__bn_mul4x_mont
 Lsqr8x_mont:
@@ -984,7 +984,7 @@
 .align	5
 __bn_mul4x_mont:
 	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
-	// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
+	// only from bn_mul_mont_words or __bn_mul8x_mont which have already signed the
 	// return address.
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
diff --git a/gen/bcm/x86-mont-apple.S b/gen/bcm/x86-mont-apple.S
index 6e549c7..89e439b 100644
--- a/gen/bcm/x86-mont-apple.S
+++ b/gen/bcm/x86-mont-apple.S
@@ -5,11 +5,11 @@
 
 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
 .text
-.globl	_bn_mul_mont
-.private_extern	_bn_mul_mont
+.globl	_bn_mul_mont_words
+.private_extern	_bn_mul_mont_words
 .align	4
-_bn_mul_mont:
-L_bn_mul_mont_begin:
+_bn_mul_mont_words:
+L_bn_mul_mont_words_begin:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%esi
diff --git a/gen/bcm/x86-mont-linux.S b/gen/bcm/x86-mont-linux.S
index 21fbee2..edf98a2 100644
--- a/gen/bcm/x86-mont-linux.S
+++ b/gen/bcm/x86-mont-linux.S
@@ -5,12 +5,12 @@
 
 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
 .text
-.globl	bn_mul_mont
-.hidden	bn_mul_mont
-.type	bn_mul_mont,@function
+.globl	bn_mul_mont_words
+.hidden	bn_mul_mont_words
+.type	bn_mul_mont_words,@function
 .align	16
-bn_mul_mont:
-.L_bn_mul_mont_begin:
+bn_mul_mont_words:
+.L_bn_mul_mont_words_begin:
 	pushl	%ebp
 	pushl	%ebx
 	pushl	%esi
@@ -209,7 +209,7 @@
 	popl	%ebx
 	popl	%ebp
 	ret
-.size	bn_mul_mont,.-.L_bn_mul_mont_begin
+.size	bn_mul_mont_words,.-.L_bn_mul_mont_words_begin
 .byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
 .byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
 .byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
diff --git a/gen/bcm/x86-mont-win.asm b/gen/bcm/x86-mont-win.asm
index d154078..f9d12bb 100644
--- a/gen/bcm/x86-mont-win.asm
+++ b/gen/bcm/x86-mont-win.asm
@@ -13,10 +13,10 @@
 %else
 section	.text	code
 %endif
-global	_bn_mul_mont
+global	_bn_mul_mont_words
 align	16
-_bn_mul_mont:
-L$_bn_mul_mont_begin:
+_bn_mul_mont_words:
+L$_bn_mul_mont_words_begin:
 	push	ebp
 	push	ebx
 	push	esi