Fix ABI error in bn_mul_mont on aarch64.

This was caught by an aarch64 ABI tester. aarch64 has the same
considerations around small arguments as x86_64 does. The aarch64
version of bn_mul_mont does not mask off the upper words of the
argument.

The x86_64 version does, so size_t is, strictly speaking, wrong for
aarch64, but bn_mul_mont already has an implicit size limit to support
its internal alloca, so this doesn't really make things worse than
before.

Change-Id: I39bffc8fdb2287e45a2d1f0d1b4bd5532bbf3868
Reviewed-on: https://boringssl-review.googlesource.com/c/34804
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/bn/asm/armv8-mont.pl b/crypto/fipsmodule/bn/asm/armv8-mont.pl
index acad064..aab9eaa 100644
--- a/crypto/fipsmodule/bn/asm/armv8-mont.pl
+++ b/crypto/fipsmodule/bn/asm/armv8-mont.pl
@@ -61,7 +61,7 @@
 $bp="x2";	# const BN_ULONG *bp,
 $np="x3";	# const BN_ULONG *np,
 $n0="x4";	# const BN_ULONG *n0,
-$num="x5";	# int num);
+$num="x5";	# size_t num);
 
 $code.=<<___;
 .text
diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont.pl b/crypto/fipsmodule/bn/asm/x86_64-mont.pl
index 023143a..3d98e72 100755
--- a/crypto/fipsmodule/bn/asm/x86_64-mont.pl
+++ b/crypto/fipsmodule/bn/asm/x86_64-mont.pl
@@ -71,7 +71,9 @@
 $bp="%rdx";	# const BN_ULONG *bp,
 $np="%rcx";	# const BN_ULONG *np,
 $n0="%r8";	# const BN_ULONG *n0,
-$num="%r9";	# int num);
+# TODO(davidben): The code below treats $num as an int, but C passes in a
+# size_t.
+$num="%r9";	# size_t num);
 $lo0="%r10";
 $hi0="%r11";
 $hi1="%r13";
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
index 0bcc031..752f0dc 100644
--- a/crypto/fipsmodule/bn/internal.h
+++ b/crypto/fipsmodule/bn/internal.h
@@ -340,8 +340,21 @@
     (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
      defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
 #define OPENSSL_BN_ASM_MONT
+// bn_mul_mont writes |ap| * |bp| mod |np| to |rp|, each |num| words
+// long. Inputs and outputs are in Montgomery form. |n0| is a pointer to the
+// corresponding field in |BN_MONT_CTX|. It returns one if |bn_mul_mont| handles
+// inputs of this size and zero otherwise.
+//
+// TODO(davidben): The x86_64 implementation expects a 32-bit input and masks
+// off upper bits. The aarch64 implementation expects a 64-bit input and does
+// not. |size_t| is the safer option but not strictly correct for x86_64. But
+// this function implicitly already has a bound on the size of |num| because it
+// internally creates |num|-sized stack allocation.
+//
+// See also discussion in |ToWord| in abi_test.h for notes on smaller-than-word
+// inputs.
 int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
-                const BN_ULONG *np, const BN_ULONG *n0, int num);
+                const BN_ULONG *np, const BN_ULONG *n0, size_t num);
 #endif
 
 uint64_t bn_mont_n0(const BIGNUM *n);