Unwind RDRAND functions correctly on Windows.

But for the ABI conversion bits, these are just leaf functions and don't
even need unwind tables. Just renumber the registers on Windows to only
used volatile ones.

In doing so, this switches to writing rdrand explicitly. perlasm already
knows how to manually encode it and our minimum assembler versions
surely cover rdrand by now anyway. Also add the .size directive. I'm not
sure what it's used for, but the other files have it.

(This isn't a generally reusable technique. The more complex functions
will need actual unwind codes.)

Bug: 259
Change-Id: I1d5669bcf8b6e34939885d78aea6f60597be1528
Reviewed-on: https://boringssl-review.googlesource.com/c/34867
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl b/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl
index 056dd74..76b5f9b 100644
--- a/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl
+++ b/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl
@@ -14,17 +14,27 @@
 # OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
 
-$flavour = shift;
-$output  = shift;
+use strict;
+
+my $flavour = shift;
+my $output  = shift;
 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
 
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+my $win64 = 0;
+$win64 = 1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/;
+my $dir = $1;
+my $xlate;
 ( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";
 
 open OUT,"| \"$^X\" $xlate $flavour $output";
 *STDOUT=*OUT;
 
+my ($out, $len, $tmp1, $tmp2) = $win64 ? ("%rcx", "%rdx", "%r8", "%r9")
+                                       : ("%rdi", "%rsi", "%rdx", "%rcx");
+
 print<<___;
 .text
 
@@ -32,40 +42,37 @@
 # |out|. It returns one on success or zero on hardware failure.
 # int CRYPTO_rdrand(uint8_t out[8]);
 .globl	CRYPTO_rdrand
-.type	CRYPTO_rdrand,\@function,1
+.type	CRYPTO_rdrand,\@abi-omnipotent
 .align	16
 CRYPTO_rdrand:
 .cfi_startproc
 	xorq %rax, %rax
-	# This is rdrand %rcx. It sets rcx to a random value and sets the carry
-	# flag on success.
-	.byte 0x48, 0x0f, 0xc7, 0xf1
+	rdrand $tmp1
 	# An add-with-carry of zero effectively sets %rax to the carry flag.
 	adcq %rax, %rax
-	movq %rcx, 0(%rdi)
+	movq $tmp1, 0($out)
 	retq
 .cfi_endproc
+.size CRYPTO_rdrand,.-CRYPTO_rdrand
 
 # CRYPTO_rdrand_multiple8_buf fills |len| bytes at |buf| with random data from
 # the hardware RNG. The |len| argument must be a multiple of eight. It returns
 # one on success and zero on hardware failure.
 # int CRYPTO_rdrand_multiple8_buf(uint8_t *buf, size_t len);
 .globl CRYPTO_rdrand_multiple8_buf
-.type CRYPTO_rdrand_multiple8_buf,\@function,2
+.type CRYPTO_rdrand_multiple8_buf,\@abi-omnipotent
 .align 16
 CRYPTO_rdrand_multiple8_buf:
 .cfi_startproc
-	test %rsi, %rsi
+	test $len, $len
 	jz .Lout
-	movq \$8, %rdx
+	movq \$8, $tmp1
 .Lloop:
-	# This is rdrand %rcx. It sets rcx to a random value and sets the carry
-	# flag on success.
-	.byte 0x48, 0x0f, 0xc7, 0xf1
+	rdrand $tmp2
 	jnc .Lerr
-	movq %rcx, 0(%rdi)
-	addq %rdx, %rdi
-	subq %rdx, %rsi
+	movq $tmp2, 0($out)
+	addq $tmp1, $out
+	subq $tmp1, $len
 	jnz .Lloop
 .Lout:
 	movq \$1, %rax
@@ -74,6 +81,7 @@
 	xorq %rax, %rax
 	retq
 .cfi_endproc
+.size CRYPTO_rdrand_multiple8_buf,.-CRYPTO_rdrand_multiple8_buf
 ___
 
 close STDOUT;	# flush
diff --git a/crypto/fipsmodule/rand/internal.h b/crypto/fipsmodule/rand/internal.h
index f73f4a1..ad75823 100644
--- a/crypto/fipsmodule/rand/internal.h
+++ b/crypto/fipsmodule/rand/internal.h
@@ -16,6 +16,7 @@
 #define OPENSSL_HEADER_CRYPTO_RAND_INTERNAL_H
 
 #include <openssl/aes.h>
+#include <openssl/cpu.h>
 
 #include "../../internal.h"
 #include "../modes/internal.h"
@@ -85,6 +86,22 @@
 OPENSSL_EXPORT void CTR_DRBG_clear(CTR_DRBG_STATE *drbg);
 
 
+#if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM)
+OPENSSL_INLINE int have_rdrand(void) {
+  return (OPENSSL_ia32cap_get()[1] & (1u << 30)) != 0;
+}
+
+// CRYPTO_rdrand writes eight bytes of random data from the hardware RNG to
+// |out|. It returns one on success or zero on hardware failure.
+int CRYPTO_rdrand(uint8_t out[8]);
+
+// CRYPTO_rdrand_multiple8_buf fills |len| bytes at |buf| with random data from
+// the hardware RNG. The |len| argument must be a multiple of eight. It returns
+// one on success and zero on hardware failure.
+int CRYPTO_rdrand_multiple8_buf(uint8_t *buf, size_t len);
+#endif  // OPENSSL_X86_64 && !OPENSSL_NO_ASM
+
+
 #if defined(__cplusplus)
 }  // extern C
 #endif
diff --git a/crypto/fipsmodule/rand/rand.c b/crypto/fipsmodule/rand/rand.c
index 4128033..a8ef458 100644
--- a/crypto/fipsmodule/rand/rand.c
+++ b/crypto/fipsmodule/rand/rand.c
@@ -125,15 +125,6 @@
 
 #if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM) && \
     !defined(BORINGSSL_UNSAFE_DETERMINISTIC_MODE)
-
-// These functions are defined in asm/rdrand-x86_64.pl
-extern int CRYPTO_rdrand(uint8_t out[8]);
-extern int CRYPTO_rdrand_multiple8_buf(uint8_t *buf, size_t len);
-
-static int have_rdrand(void) {
-  return (OPENSSL_ia32cap_get()[1] & (1u << 30)) != 0;
-}
-
 static int hwrand(uint8_t *buf, const size_t len) {
   if (!have_rdrand()) {
     return 0;
diff --git a/crypto/rand_extra/rand_test.cc b/crypto/rand_extra/rand_test.cc
index 9bc5d97..9c69b8f 100644
--- a/crypto/rand_extra/rand_test.cc
+++ b/crypto/rand_extra/rand_test.cc
@@ -21,6 +21,7 @@
 #include <openssl/cpu.h>
 #include <openssl/span.h>
 
+#include "../fipsmodule/rand/internal.h"
 #include "../test/abi_test.h"
 #include "../test/test_util.h"
 
@@ -189,23 +190,18 @@
 #endif  // OPENSSL_THREADS
 
 #if defined(OPENSSL_X86_64) && defined(SUPPORTS_ABI_TEST)
-extern "C" {
-int CRYPTO_rdrand(uint8_t out[8]);
-int CRYPTO_rdrand_multiple8_buf(uint8_t *buf, size_t len);
-}  // extern "C"
-
 TEST(RandTest, RdrandABI) {
-  if ((OPENSSL_ia32cap_P[1] & (1u << 30)) == 0) {
+  if (!have_rdrand()) {
     fprintf(stderr, "rdrand not supported. Skipping.\n");
     return;
   }
 
   uint8_t buf[32];
-  CHECK_ABI(CRYPTO_rdrand, buf);
-  CHECK_ABI(CRYPTO_rdrand_multiple8_buf, nullptr, 0);
-  CHECK_ABI(CRYPTO_rdrand_multiple8_buf, buf, 8);
-  CHECK_ABI(CRYPTO_rdrand_multiple8_buf, buf, 16);
-  CHECK_ABI(CRYPTO_rdrand_multiple8_buf, buf, 24);
-  CHECK_ABI(CRYPTO_rdrand_multiple8_buf, buf, 32);
+  CHECK_ABI_SEH(CRYPTO_rdrand, buf);
+  CHECK_ABI_SEH(CRYPTO_rdrand_multiple8_buf, nullptr, 0);
+  CHECK_ABI_SEH(CRYPTO_rdrand_multiple8_buf, buf, 8);
+  CHECK_ABI_SEH(CRYPTO_rdrand_multiple8_buf, buf, 16);
+  CHECK_ABI_SEH(CRYPTO_rdrand_multiple8_buf, buf, 24);
+  CHECK_ABI_SEH(CRYPTO_rdrand_multiple8_buf, buf, 32);
 }
 #endif  // OPENSSL_X86_64 && SUPPORTS_ABI_TEST