Implement unwind testing for Windows.

Unfortunately, due to most OpenSSL assembly using custom exception
handlers to unwind, most of our assembly doesn't work with
non-destructive unwind. For now, CHECK_ABI behaves like
CHECK_ABI_NO_UNWIND on Windows, and CHECK_ABI_SEH will test unwinding on
both platforms.

The tests do, however, work with the unwind-code-based assembly we
recently added, as well as the clmul-based GHASH which is also
code-based. Remove the ad-hoc SEH tests which intentionally hit memory
access exceptions, now that we can test unwind directly.

Now that we can test it, the next step is to implement SEH directives in
perlasm so writing these unwind codes is less of a chore.

Bug: 259
Change-Id: I23a57a22c5dc9fa4513f575f18192335779678a5
Reviewed-on: https://boringssl-review.googlesource.com/c/34784
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/abi_self_test.cc b/crypto/abi_self_test.cc
index 5352b80..c5bace1 100644
--- a/crypto/abi_self_test.cc
+++ b/crypto/abi_self_test.cc
@@ -19,10 +19,6 @@
 
 #include "test/abi_test.h"
 
-#if defined(OPENSSL_WINDOWS)
-#include <windows.h>
-#endif
-
 
 static bool test_function_ok;
 static int TestFunction(int a1, int a2, int a3, int a4, int a5, int a6, int a7,
@@ -36,7 +32,7 @@
   EXPECT_NE(0, CHECK_ABI_NO_UNWIND(strcmp, "hello", "world"));
 
   test_function_ok = false;
-  EXPECT_EQ(42, CHECK_ABI(TestFunction, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
+  EXPECT_EQ(42, CHECK_ABI_SEH(TestFunction, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
   EXPECT_TRUE(test_function_ok);
 
 #if defined(SUPPORTS_ABI_TEST)
@@ -45,18 +41,28 @@
   crypto_word_t argv[] = {
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
   };
-  CHECK_ABI(abi_test_trampoline, reinterpret_cast<crypto_word_t>(TestFunction),
-            &state, argv, 10, 0 /* no breakpoint */);
+  CHECK_ABI_SEH(abi_test_trampoline,
+                reinterpret_cast<crypto_word_t>(TestFunction), &state, argv, 10,
+                0 /* no breakpoint */);
 
 #if defined(OPENSSL_X86_64)
   if (abi_test::UnwindTestsEnabled()) {
-    EXPECT_NONFATAL_FAILURE(CHECK_ABI(abi_test_bad_unwind_wrong_register),
+    EXPECT_NONFATAL_FAILURE(CHECK_ABI_SEH(abi_test_bad_unwind_wrong_register),
                             "was not recovered unwinding");
-    EXPECT_NONFATAL_FAILURE(CHECK_ABI(abi_test_bad_unwind_temporary),
+    EXPECT_NONFATAL_FAILURE(CHECK_ABI_SEH(abi_test_bad_unwind_temporary),
                             "was not recovered unwinding");
 
     CHECK_ABI_NO_UNWIND(abi_test_bad_unwind_wrong_register);
     CHECK_ABI_NO_UNWIND(abi_test_bad_unwind_temporary);
+
+#if defined(OPENSSL_WINDOWS)
+    // The invalid epilog makes Windows believe the epilog starts later than it
+    // actually does. As a result, immediately after the popq, it does not
+    // realize the stack has been unwound and repeats the work.
+    EXPECT_NONFATAL_FAILURE(CHECK_ABI_SEH(abi_test_bad_unwind_epilog),
+                            "unwound past starting frame");
+    CHECK_ABI_NO_UNWIND(abi_test_bad_unwind_epilog);
+#endif  // OPENSSL_WINDOWS
   }
 #endif  // OPENSSL_X86_64
 #endif  // SUPPORTS_ABI_TEST
@@ -180,33 +186,6 @@
   EXPECT_EQ(0, abi_test_get_and_clear_direction_flag())
       << "CHECK_ABI did not insulate the caller from direction flag errors";
 }
-
-#if defined(OPENSSL_WINDOWS)
-static void ThrowWindowsException() {
-  DebugBreak();
-}
-
-static void ExceptionTest() {
-  bool handled = false;
-  __try {
-    CHECK_ABI_NO_UNWIND(ThrowWindowsException);
-  } __except (GetExceptionCode() == EXCEPTION_BREAKPOINT
-                  ? EXCEPTION_EXECUTE_HANDLER
-                  : EXCEPTION_CONTINUE_SEARCH) {
-    handled = true;
-  }
-
-  EXPECT_TRUE(handled);
-}
-
-// Test that the trampoline's SEH metadata works.
-TEST(ABITest, TrampolineSEH) {
-  // Wrap the test in |CHECK_ABI|, to confirm the register-restoring annotations
-  // were correct.
-  CHECK_ABI_NO_UNWIND(ExceptionTest);
-}
-#endif  // OPENSSL_WINDOWS
-
 #endif   // OPENSSL_X86_64 && SUPPORTS_ABI_TEST
 
 #if defined(OPENSSL_X86) && defined(SUPPORTS_ABI_TEST)
diff --git a/crypto/fipsmodule/modes/gcm_test.cc b/crypto/fipsmodule/modes/gcm_test.cc
index c1b2c9f..9283cd2 100644
--- a/crypto/fipsmodule/modes/gcm_test.cc
+++ b/crypto/fipsmodule/modes/gcm_test.cc
@@ -61,12 +61,6 @@
 #include "../../test/file_test.h"
 #include "../../test/test_util.h"
 
-#if defined(OPENSSL_WINDOWS)
-OPENSSL_MSVC_PRAGMA(warning(push, 3))
-#include <windows.h>
-OPENSSL_MSVC_PRAGMA(warning(pop))
-#endif
-
 
 TEST(GCMTest, TestVectors) {
   FileTestGTest("crypto/fipsmodule/modes/gcm_tests.txt", [](FileTest *t) {
@@ -155,66 +149,32 @@
 
 #if defined(GHASH_ASM_X86_64)
   if (gcm_ssse3_capable()) {
-    CHECK_ABI(gcm_init_ssse3, Htable, kH);
-    CHECK_ABI(gcm_gmult_ssse3, X, Htable);
+    CHECK_ABI_SEH(gcm_init_ssse3, Htable, kH);
+    CHECK_ABI_SEH(gcm_gmult_ssse3, X, Htable);
     for (size_t blocks : kBlockCounts) {
-      CHECK_ABI(gcm_ghash_ssse3, X, Htable, buf, 16 * blocks);
+      CHECK_ABI_SEH(gcm_ghash_ssse3, X, Htable, buf, 16 * blocks);
     }
   }
 #endif  // GHASH_ASM_X86_64
 
 #if defined(GHASH_ASM_X86) || defined(GHASH_ASM_X86_64)
   if (crypto_gcm_clmul_enabled()) {
-    CHECK_ABI(gcm_init_clmul, Htable, kH);
-    CHECK_ABI(gcm_gmult_clmul, X, Htable);
+    CHECK_ABI_SEH(gcm_init_clmul, Htable, kH);
+    CHECK_ABI_SEH(gcm_gmult_clmul, X, Htable);
     for (size_t blocks : kBlockCounts) {
-      CHECK_ABI(gcm_ghash_clmul, X, Htable, buf, 16 * blocks);
+      CHECK_ABI_SEH(gcm_ghash_clmul, X, Htable, buf, 16 * blocks);
     }
 
 #if defined(GHASH_ASM_X86_64)
     if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) {  // AVX+MOVBE
-      CHECK_ABI(gcm_init_avx, Htable, kH);
-      CHECK_ABI(gcm_gmult_avx, X, Htable);
+      CHECK_ABI_SEH(gcm_init_avx, Htable, kH);
+      CHECK_ABI_SEH(gcm_gmult_avx, X, Htable);
       for (size_t blocks : kBlockCounts) {
-        CHECK_ABI(gcm_ghash_avx, X, Htable, buf, 16 * blocks);
+        CHECK_ABI_SEH(gcm_ghash_avx, X, Htable, buf, 16 * blocks);
       }
     }
 #endif  // GHASH_ASM_X86_64
   }
 #endif  // GHASH_ASM_X86 || GHASH_ASM_X86_64
 }
-
-#if defined(OPENSSL_WINDOWS) && defined(GHASH_ASM_X86_64)
-// Sanity-check the SEH unwind codes in ghash-ssse3-x86_64.pl.
-// TODO(davidben): Implement unwind testing for SEH and remove this.
-static void GCMSSSE3ExceptionTest() {
-  if (!gcm_ssse3_capable()) {
-    return;
-  }
-
-  bool handled = false;
-  __try {
-    gcm_gmult_ssse3(nullptr, nullptr);
-  } __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION
-                  ? EXCEPTION_EXECUTE_HANDLER
-                  : EXCEPTION_CONTINUE_SEARCH) {
-    handled = true;
-  }
-  EXPECT_TRUE(handled);
-
-  handled = false;
-  __try {
-    gcm_ghash_ssse3(nullptr, nullptr, nullptr, 16);
-  } __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION
-                  ? EXCEPTION_EXECUTE_HANDLER
-                  : EXCEPTION_CONTINUE_SEARCH) {
-    handled = true;
-  }
-  EXPECT_TRUE(handled);
-}
-
-TEST(GCMTest, SEH) {
-  CHECK_ABI_NO_UNWIND(GCMSSSE3ExceptionTest);
-}
-#endif  // OPENSSL_WINDOWS && GHASH_ASM_X86_64
 #endif  // SUPPORTS_ABI_TEST && GHASH_ASM
diff --git a/crypto/test/abi_test.cc b/crypto/test/abi_test.cc
index 6530ddf..39dd2be 100644
--- a/crypto/test/abi_test.cc
+++ b/crypto/test/abi_test.cc
@@ -25,10 +25,9 @@
 #include <openssl/rand.h>
 #include <openssl/span.h>
 
-#if defined(OPENSSL_LINUX) && defined(SUPPORTS_ABI_TEST) && \
-    defined(BORINGSSL_HAVE_LIBUNWIND)
-#define UNWIND_TEST_SIGTRAP
-
+#if defined(OPENSSL_X86_64) && defined(SUPPORTS_ABI_TEST)
+#if defined(OPENSSL_LINUX) && defined(BORINGSSL_HAVE_LIBUNWIND)
+#define SUPPORTS_UNWIND_TEST
 #define UNW_LOCAL_ONLY
 #include <errno.h>
 #include <fcntl.h>
@@ -40,7 +39,13 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
-#endif  // LINUX && SUPPORTS_ABI_TEST && HAVE_LIBUNWIND
+#elif defined(OPENSSL_WINDOWS)
+#define SUPPORTS_UNWIND_TEST
+OPENSSL_MSVC_PRAGMA(warning(push, 3))
+#include <windows.h>
+OPENSSL_MSVC_PRAGMA(warning(pop))
+#endif
+#endif  // X86_64 && SUPPORTS_ABI_TEST
 
 
 namespace abi_test {
@@ -116,11 +121,523 @@
 }
 #endif  // SUPPORTS_ABI_TEST
 
-#if defined(UNWIND_TEST_SIGTRAP)
-// On Linux, we test unwind metadata using libunwind and |SIGTRAP|. We run the
-// function under test with the trap flag set. This results in |SIGTRAP|s on
-// every instruction. We then handle these signals and verify with libunwind.
+#if defined(SUPPORTS_UNWIND_TEST)
+// We test unwind metadata by running the function under test with the trap flag
+// set. This results in |SIGTRAP| and |EXCEPTION_SINGLE_STEP| on Linux and
+// Windows, respectively. We hande these and verify libunwind or the Windows
+// unwind APIs unwind successfully.
 
+// IsAncestorStackFrame returns true if |a_sp| is an ancestor stack frame of
+// |b_sp|.
+static bool IsAncestorStackFrame(crypto_word_t a_sp, crypto_word_t b_sp) {
+#if defined(OPENSSL_X86_64)
+  // The stack grows down, so ancestor stack frames have higher addresses.
+  return a_sp > b_sp;
+#else
+#error "unknown architecture"
+#endif
+}
+
+// Implement some string formatting utilties. Ideally we would use |snprintf|,
+// but this is called in a signal handler and |snprintf| is not async-signal-
+// safe.
+
+#if !defined(OPENSSL_WINDOWS)
+static std::array<char, DECIMAL_SIZE(crypto_word_t) + 1> WordToDecimal(
+    crypto_word_t v) {
+  std::array<char, DECIMAL_SIZE(crypto_word_t) + 1> ret;
+  size_t len = 0;
+  do {
+    ret[len++] = '0' + v % 10;
+    v /= 10;
+  } while (v != 0);
+  for (size_t i = 0; i < len / 2; i++) {
+    std::swap(ret[i], ret[len - 1 - i]);
+  }
+  ret[len] = '\0';
+  return ret;
+}
+#endif  // !OPENSSL_WINDOWS
+
+static std::array<char, sizeof(crypto_word_t) * 2 + 1> WordToHex(
+    crypto_word_t v) {
+  static const char kHex[] = "0123456789abcdef";
+  std::array<char, sizeof(crypto_word_t) * 2 + 1> ret;
+  for (size_t i = sizeof(crypto_word_t) - 1; i < sizeof(crypto_word_t); i--) {
+    uint8_t b = v & 0xff;
+    v >>= 8;
+    ret[i * 2] = kHex[b >> 4];
+    ret[i * 2 + 1] = kHex[b & 0xf];
+  }
+  ret[sizeof(crypto_word_t) * 2] = '\0';
+  return ret;
+}
+
+static void StrCatSignalSafeImpl(bssl::Span<char> out) {}
+
+template <typename... Args>
+static void StrCatSignalSafeImpl(bssl::Span<char> out, const char *str,
+                                 Args... args) {
+  BUF_strlcat(out.data(), str, out.size());
+  StrCatSignalSafeImpl(out, args...);
+}
+
+template <typename... Args>
+static void StrCatSignalSafe(bssl::Span<char> out, Args... args) {
+  if (out.empty()) {
+    return;
+  }
+  out[0] = '\0';
+  StrCatSignalSafeImpl(out, args...);
+}
+
+template <typename... Args>
+[[noreturn]] static void FatalError(Args... args) {
+  // We cannot use |snprintf| here because it is not async-signal-safe.
+  char buf[512];
+  StrCatSignalSafe(buf, args..., "\n");
+#if defined(OPENSSL_WINDOWS)
+  HANDLE stderr_handle = GetStdHandle(STD_ERROR_HANDLE);
+  if (stderr_handle != INVALID_HANDLE_VALUE) {
+    DWORD unused;
+    WriteFile(stderr_handle, buf, strlen(buf), &unused, nullptr);
+  }
+#else
+  write(STDERR_FILENO, buf, strlen(buf));
+#endif
+  abort();
+}
+
+class UnwindStatus {
+ public:
+  UnwindStatus() : err_(nullptr) {}
+  explicit UnwindStatus(const char *err) : err_(err) {}
+
+  bool ok() const { return err_ == nullptr; }
+  const char *Error() const { return err_; }
+
+ private:
+  const char *err_;
+};
+
+template<typename T>
+class UnwindStatusOr {
+ public:
+  UnwindStatusOr(UnwindStatus status) : status_(status) {
+    assert(!status_.ok());
+  }
+
+  UnwindStatusOr(const T &value) : status_(UnwindStatus()), value_(value) {}
+
+  bool ok() const { return status_.ok(); }
+  const char *Error() const { return status_.Error(); }
+
+  const T &ValueOrDie(const char *msg = "Unexpected error") const {
+    if (!ok()) {
+      FatalError(msg, ": ", Error());
+    }
+    return value_;
+  }
+
+ private:
+  UnwindStatus status_;
+  T value_;
+};
+
+// UnwindCursor abstracts between libunwind and Windows unwind APIs. It is
+// async-signal-safe.
+#if defined(OPENSSL_WINDOWS)
+class UnwindCursor {
+ public:
+  explicit UnwindCursor(const CONTEXT &ctx) : ctx_(ctx) {
+    starting_ip_ = ctx_.Rip;
+  }
+
+  // Step unwinds the cursor by one frame. On success, it returns whether there
+  // were more frames to unwind.
+  UnwindStatusOr<bool> Step() {
+    bool is_top = is_top_;
+    is_top_ = false;
+
+    DWORD64 image_base;
+    RUNTIME_FUNCTION *entry =
+        RtlLookupFunctionEntry(ctx_.Rip, &image_base, nullptr);
+    if (entry == nullptr) {
+      // This is a leaf function. Leaf functions do not touch stack or
+      // callee-saved registers, so they may be unwound by simulating a ret.
+      if (!is_top) {
+        return UnwindStatus("leaf function found below the top frame");
+      }
+      memcpy(&ctx_.Rip, reinterpret_cast<const void *>(ctx_.Rsp),
+             sizeof(ctx_.Rip));
+      ctx_.Rsp += 8;
+      return true;
+    }
+
+    // This is a frame function. Call into the Windows unwinder.
+    void *handler_data;
+    DWORD64 establisher_frame;
+    RtlVirtualUnwind(UNW_FLAG_NHANDLER, image_base, ctx_.Rip, entry, &ctx_,
+                     &handler_data, &establisher_frame, nullptr);
+    return ctx_.Rip != 0;
+  }
+
+  // GetIP returns the instruction pointer at the current frame.
+  UnwindStatusOr<crypto_word_t> GetIP() { return ctx_.Rip; }
+
+  // GetSP returns the stack pointer at the current frame.
+  UnwindStatusOr<crypto_word_t> GetSP() { return ctx_.Rsp; }
+
+  // GetCallerState returns the callee-saved registers at the current frame.
+  UnwindStatusOr<CallerState> GetCallerState() {
+    CallerState state;
+    state.rbx = ctx_.Rbx;
+    state.rbp = ctx_.Rbp;
+    state.rdi = ctx_.Rdi;
+    state.rsi = ctx_.Rsi;
+    state.r12 = ctx_.R12;
+    state.r13 = ctx_.R13;
+    state.r14 = ctx_.R14;
+    state.r15 = ctx_.R15;
+    memcpy(&state.xmm6, &ctx_.Xmm6, sizeof(Reg128));
+    memcpy(&state.xmm7, &ctx_.Xmm7, sizeof(Reg128));
+    memcpy(&state.xmm8, &ctx_.Xmm8, sizeof(Reg128));
+    memcpy(&state.xmm9, &ctx_.Xmm9, sizeof(Reg128));
+    memcpy(&state.xmm10, &ctx_.Xmm10, sizeof(Reg128));
+    memcpy(&state.xmm11, &ctx_.Xmm11, sizeof(Reg128));
+    memcpy(&state.xmm12, &ctx_.Xmm12, sizeof(Reg128));
+    memcpy(&state.xmm13, &ctx_.Xmm13, sizeof(Reg128));
+    memcpy(&state.xmm14, &ctx_.Xmm14, sizeof(Reg128));
+    memcpy(&state.xmm15, &ctx_.Xmm15, sizeof(Reg128));
+    return state;
+  }
+
+  // ToString returns a human-readable representation of the address the cursor
+  // started at, using debug information if available.
+  const char *ToString() {
+    // TODO(davidben): Use SymFromAddr here. See base/debug/stack_trace_win.cc
+    // in Chromium for an example. It probably should be called outside the
+    // exception handler, which means we need to stash the address in
+    // |g_unwind_errors| to defer it.
+    StrCatSignalSafe(starting_ip_buf_, "0x", WordToHex(starting_ip_).data());
+    return starting_ip_buf_;
+  }
+
+ private:
+  CONTEXT ctx_;
+  crypto_word_t starting_ip_;
+  char starting_ip_buf_[64];
+  bool is_top_ = true;
+};
+#else  // !OPENSSL_WINDOWS
+class UnwindCursor {
+ public:
+  explicit UnwindCursor(unw_context_t *ctx) : ctx_(ctx) {
+    int ret = InitAtSignalFrame(&cursor_);
+    if (ret < 0) {
+      FatalError("Error getting unwind context: ", unw_strerror(ret));
+    }
+    starting_ip_ = GetIP().ValueOrDie("Error getting instruction pointer");
+  }
+
+  // Step unwinds the cursor by one frame. On success, it returns whether there
+  // were more frames to unwind.
+  UnwindStatusOr<bool> Step() {
+    int ret = unw_step(&cursor_);
+    if (ret < 0) {
+      return UNWError(ret);
+    }
+    return ret != 0;
+  }
+
+  // GetIP returns the instruction pointer at the current frame.
+  UnwindStatusOr<crypto_word_t> GetIP() {
+    crypto_word_t ip;
+    int ret = GetReg(&ip, UNW_REG_IP);
+    if (ret < 0) {
+      return UNWError(ret);
+    }
+    return ip;
+  }
+
+  // GetSP returns the stack pointer at the current frame.
+  UnwindStatusOr<crypto_word_t> GetSP() {
+    crypto_word_t sp;
+    int ret = GetReg(&sp, UNW_REG_SP);
+    if (ret < 0) {
+      return UNWError(ret);
+    }
+    return sp;
+  }
+
+  // GetCallerState returns the callee-saved registers at the current frame.
+  UnwindStatusOr<CallerState> GetCallerState() {
+    CallerState state;
+    int ret = 0;
+#if defined(OPENSSL_X86_64)
+    ret = ret < 0 ? ret : GetReg(&state.rbx, UNW_X86_64_RBX);
+    ret = ret < 0 ? ret : GetReg(&state.rbp, UNW_X86_64_RBP);
+    ret = ret < 0 ? ret : GetReg(&state.r12, UNW_X86_64_R12);
+    ret = ret < 0 ? ret : GetReg(&state.r13, UNW_X86_64_R13);
+    ret = ret < 0 ? ret : GetReg(&state.r14, UNW_X86_64_R14);
+    ret = ret < 0 ? ret : GetReg(&state.r15, UNW_X86_64_R15);
+#else
+#error "unknown architecture"
+#endif
+    if (ret < 0) {
+      return UNWError(ret);
+    }
+    return state;
+  }
+
+  // ToString returns a human-readable representation of the address the cursor
+  // started at, using debug information if available.
+  const char *ToString() {
+    // Use a new cursor. |cursor_| has already been unwound, and
+    // |unw_get_proc_name| is slow so we do not sample it unconditionally in the
+    // constructor.
+    unw_cursor_t cursor;
+    unw_word_t off;
+    if (InitAtSignalFrame(&cursor) != 0 ||
+        unw_get_proc_name(&cursor, starting_ip_buf_, sizeof(starting_ip_buf_),
+                          &off) != 0) {
+      StrCatSignalSafe(starting_ip_buf_, "0x", WordToHex(starting_ip_).data());
+      return starting_ip_buf_;
+    }
+    size_t len = strlen(starting_ip_buf_);
+    // Print the offset in decimal, to match gdb's disassembly output and ease
+    // debugging.
+    StrCatSignalSafe(bssl::Span<char>(starting_ip_buf_).subspan(len), "+",
+                     WordToDecimal(off).data(), " (0x",
+                     WordToHex(starting_ip_).data(), ")");
+    return starting_ip_buf_;
+  }
+
+ private:
+  static UnwindStatus UNWError(int ret) {
+    assert(ret < 0);
+    const char *msg = unw_strerror(ret);
+    return UnwindStatus(msg == nullptr ? "unknown error" : msg);
+  }
+
+  int InitAtSignalFrame(unw_cursor_t *cursor) {
+    // Work around a bug in libunwind which breaks rax and rdx recovery. This
+    // breaks functions which temporarily use rax as the CFA register. See
+    // https://git.savannah.gnu.org/gitweb/?p=libunwind.git;a=commit;h=819bf51bbd2da462c2ec3401e8ac9153b6e725e3
+    OPENSSL_memset(cursor, 0, sizeof(*cursor));
+    int ret = unw_init_local(cursor, ctx_);
+    if (ret < 0) {
+      return ret;
+    }
+    for (;;) {
+      ret = unw_is_signal_frame(cursor);
+      if (ret < 0) {
+        return ret;
+      }
+      if (ret != 0) {
+        return 0;  // Found the signal frame.
+      }
+      ret = unw_step(cursor);
+      if (ret < 0) {
+        return ret;
+      }
+    }
+  }
+
+  int GetReg(crypto_word_t *out, unw_regnum_t reg) {
+    unw_word_t val;
+    int ret = unw_get_reg(&cursor_, reg, &val);
+    if (ret == 0) {
+      static_assert(sizeof(crypto_word_t) == sizeof(unw_word_t),
+                    "crypto_word_t and unw_word_t are inconsistent");
+      *out = val;
+    }
+    return ret;
+  }
+
+  unw_context_t *ctx_;
+  unw_cursor_t cursor_;
+  crypto_word_t starting_ip_;
+  char starting_ip_buf_[64];
+};
+#endif  // OPENSSL_WINDOWS
+
+// g_in_trampoline is true if we are in an instrumented |abi_test_trampoline|
+// call, in the region that triggers |SIGTRAP|.
+static bool g_in_trampoline = false;
+// g_unwind_function_done, if |g_in_trampoline| is true, is whether the function
+// under test has returned. It is undefined otherwise.
+static bool g_unwind_function_done;
+// g_trampoline_state, if |g_in_trampoline| is true, is the state the function
+// under test must preserve. It is undefined otherwise.
+static CallerState g_trampoline_state;
+// g_trampoline_sp, if |g_in_trampoline| is true, is the stack pointer of the
+// trampoline frame. It is undefined otherwise.
+static crypto_word_t g_trampoline_sp;
+
+// kMaxUnwindErrors is the maximum number of unwind errors reported per
+// function. If a function's unwind tables are wrong, we are otherwise likely to
+// repeat the same error at multiple addresses.
+static constexpr size_t kMaxUnwindErrors = 10;
+
+// Errors are saved in a signal handler. We use a static buffer to avoid
+// allocation.
+static size_t g_num_unwind_errors = 0;
+static char g_unwind_errors[kMaxUnwindErrors][512];
+
+template <typename... Args>
+static void AddUnwindError(Args... args) {
+  if (g_num_unwind_errors >= kMaxUnwindErrors) {
+    return;
+  }
+  StrCatSignalSafe(g_unwind_errors[g_num_unwind_errors], args...);
+  g_num_unwind_errors++;
+}
+
+static void CheckUnwind(UnwindCursor *cursor) {
+  const crypto_word_t kStartAddress =
+      reinterpret_cast<crypto_word_t>(&abi_test_unwind_start);
+  const crypto_word_t kReturnAddress =
+      reinterpret_cast<crypto_word_t>(&abi_test_unwind_return);
+  const crypto_word_t kStopAddress =
+      reinterpret_cast<crypto_word_t>(&abi_test_unwind_stop);
+
+  crypto_word_t sp = cursor->GetSP().ValueOrDie("Error getting stack pointer");
+  crypto_word_t ip =
+      cursor->GetIP().ValueOrDie("Error getting instruction pointer");
+  if (!g_in_trampoline) {
+    if (ip != kStartAddress) {
+      FatalError("Unexpected SIGTRAP at ", cursor->ToString());
+    }
+
+    // Save the current state and begin.
+    g_in_trampoline = true;
+    g_unwind_function_done = false;
+    g_trampoline_sp = sp;
+    g_trampoline_state = cursor->GetCallerState().ValueOrDie(
+        "Error getting initial caller state");
+  } else {
+    if (sp == g_trampoline_sp || g_unwind_function_done) {
+      // |g_unwind_function_done| should imply |sp| is |g_trampoline_sp|, but
+      // clearing the trap flag in x86 briefly displaces the stack pointer.
+      //
+      // Also note we check both |ip| and |sp| below, in case the function under
+      // test is also |abi_test_trampoline|.
+      if (ip == kReturnAddress && sp == g_trampoline_sp) {
+        g_unwind_function_done = true;
+      }
+      if (ip == kStopAddress && sp == g_trampoline_sp) {
+        // |SIGTRAP| is fatal again.
+        g_in_trampoline = false;
+      }
+    } else if (IsAncestorStackFrame(sp, g_trampoline_sp)) {
+      // This should never happen. We went past |g_trampoline_sp| without
+      // stopping at |kStopAddress|.
+      AddUnwindError("stack frame is before caller at ",
+                     cursor->ToString());
+      g_in_trampoline = false;
+    } else if (g_num_unwind_errors < kMaxUnwindErrors) {
+      for (;;) {
+        UnwindStatusOr<bool> step_ret = cursor->Step();
+        if (!step_ret.ok()) {
+          AddUnwindError("error unwinding from ", cursor->ToString(), ": ",
+                         step_ret.Error());
+          break;
+        }
+        // |Step| returns whether there was a frame to unwind.
+        if (!step_ret.ValueOrDie()) {
+          AddUnwindError("could not unwind to starting frame from ",
+                         cursor->ToString());
+          break;
+        }
+
+        UnwindStatusOr<crypto_word_t> cur_sp = cursor->GetSP();
+        if (!cur_sp.ok()) {
+          AddUnwindError("error recovering stack pointer unwinding from ",
+                         cursor->ToString(), ": ", cur_sp.Error());
+          break;
+        }
+        if (IsAncestorStackFrame(cur_sp.ValueOrDie(), g_trampoline_sp)) {
+          AddUnwindError("unwound past starting frame from ",
+                         cursor->ToString());
+          break;
+        }
+        if (cur_sp.ValueOrDie() == g_trampoline_sp) {
+          // We found the parent frame. Check the return address.
+          UnwindStatusOr<crypto_word_t> cur_ip = cursor->GetIP();
+          if (!cur_ip.ok()) {
+            AddUnwindError("error recovering return address unwinding from ",
+                           cursor->ToString(), ": ", cur_ip.Error());
+          } else if (cur_ip.ValueOrDie() != kReturnAddress) {
+            AddUnwindError("wrong return address unwinding from ",
+                           cursor->ToString());
+          }
+
+          // Check the remaining registers.
+          UnwindStatusOr<CallerState> state = cursor->GetCallerState();
+          if (!state.ok()) {
+            AddUnwindError("error recovering registers unwinding from ",
+                           cursor->ToString(), ": ", state.Error());
+          } else {
+            ForEachMismatch(
+                state.ValueOrDie(), g_trampoline_state, [&](const char *reg) {
+                  AddUnwindError(reg, " was not recovered unwinding from ",
+                                 cursor->ToString());
+                });
+          }
+          break;
+        }
+      }
+    }
+  }
+}
+
+static void ReadUnwindResult(Result *out) {
+  for (size_t i = 0; i < g_num_unwind_errors; i++) {
+    out->errors.emplace_back(g_unwind_errors[i]);
+  }
+  if (g_num_unwind_errors == kMaxUnwindErrors) {
+    out->errors.emplace_back("(additional errors omitted)");
+  }
+  g_num_unwind_errors = 0;
+}
+
+#if defined(OPENSSL_WINDOWS)
+static DWORD g_main_thread;
+
+static long ExceptionHandler(EXCEPTION_POINTERS *info) {
+  if (info->ExceptionRecord->ExceptionCode != EXCEPTION_SINGLE_STEP ||
+      GetCurrentThreadId() != g_main_thread) {
+    return EXCEPTION_CONTINUE_SEARCH;
+  }
+
+  UnwindCursor cursor(*info->ContextRecord);
+  CheckUnwind(&cursor);
+  if (g_in_trampoline) {
+    // Windows clears the trap flag, so we must restore it.
+    info->ContextRecord->EFlags |= 0x100;
+  }
+  return EXCEPTION_CONTINUE_EXECUTION;
+}
+
+static void EnableUnwindTestsImpl() {
+  if (IsDebuggerPresent()) {
+    // Unwind tests drive logic via |EXCEPTION_SINGLE_STEP|, which conflicts with
+    // debuggers.
+    fprintf(stderr, "Debugger detected. Disabling unwind tests.\n");
+    return;
+  }
+
+  g_main_thread = GetCurrentThreadId();
+
+  if (AddVectoredExceptionHandler(0, ExceptionHandler) == nullptr) {
+    fprintf(stderr, "Error installing exception handler.\n");
+    abort();
+  }
+
+  g_unwind_tests_enabled = true;
+}
+#else  // !OPENSSL_WINDOWS
 // HandleEINTR runs |func| and returns the result, retrying the operation on
 // |EINTR|.
 template <typename Func>
@@ -170,171 +687,8 @@
   return idx < status.size() && status[idx] != '0';
 }
 
-// IsAncestorStackFrame returns true if |a_sp| is an ancestor stack frame of
-// |b_sp|.
-static bool IsAncestorStackFrame(unw_word_t a_sp, unw_word_t b_sp) {
-#if defined(OPENSSL_X86_64)
-  // The stack grows down, so ancestor stack frames have higher addresses.
-  return a_sp > b_sp;
-#else
-#error "unknown architecture"
-#endif
-}
-
-static int CallerStateFromUNWCursor(CallerState *out, unw_cursor_t *cursor) {
-  // |CallerState| uses |crypto_word_t|, while libunwind uses |unw_word_t|, but
-  // both are defined as |uint*_t| from stdint.h, so we can assume the types
-  // match.
-#if defined(OPENSSL_X86_64)
-  int ret = 0;
-  ret = ret < 0 ? ret : unw_get_reg(cursor, UNW_X86_64_RBX, &out->rbx);
-  ret = ret < 0 ? ret : unw_get_reg(cursor, UNW_X86_64_RBP, &out->rbp);
-  ret = ret < 0 ? ret : unw_get_reg(cursor, UNW_X86_64_R12, &out->r12);
-  ret = ret < 0 ? ret : unw_get_reg(cursor, UNW_X86_64_R13, &out->r13);
-  ret = ret < 0 ? ret : unw_get_reg(cursor, UNW_X86_64_R14, &out->r14);
-  ret = ret < 0 ? ret : unw_get_reg(cursor, UNW_X86_64_R15, &out->r15);
-  return ret;
-#else
-#error "unknown architecture"
-#endif
-}
-
-// Implement some string formatting utilties. Ideally we would use |snprintf|,
-// but this is called in a signal handler and |snprintf| is not async-signal-
-// safe.
-
-static std::array<char, DECIMAL_SIZE(unw_word_t) + 1> WordToDecimal(
-    unw_word_t v) {
-  std::array<char, DECIMAL_SIZE(unw_word_t) + 1> ret;
-  size_t len = 0;
-  do {
-    ret[len++] = '0' + v % 10;
-    v /= 10;
-  } while (v != 0);
-  for (size_t i = 0; i < len / 2; i++) {
-    std::swap(ret[i], ret[len - 1 - i]);
-  }
-  ret[len] = '\0';
-  return ret;
-}
-
-static std::array<char, sizeof(unw_word_t) * 2 + 1> WordToHex(unw_word_t v) {
-  static const char kHex[] = "0123456789abcdef";
-  std::array<char, sizeof(unw_word_t) * 2 + 1> ret;
-  for (size_t i = sizeof(unw_word_t) - 1; i < sizeof(unw_word_t); i--) {
-    uint8_t b = v & 0xff;
-    v >>= 8;
-    ret[i * 2] = kHex[b >> 4];
-    ret[i * 2 + 1] = kHex[b & 0xf];
-  }
-  ret[sizeof(unw_word_t) * 2] = '\0';
-  return ret;
-}
-
-static void StrCatSignalSafeImpl(bssl::Span<char> out) {}
-
-template <typename... Args>
-static void StrCatSignalSafeImpl(bssl::Span<char> out, const char *str,
-                                 Args... args) {
-  BUF_strlcat(out.data(), str, out.size());
-  StrCatSignalSafeImpl(out, args...);
-}
-
-template <typename... Args>
-static void StrCatSignalSafe(bssl::Span<char> out, Args... args) {
-  if (out.empty()) {
-    return;
-  }
-  out[0] = '\0';
-  StrCatSignalSafeImpl(out, args...);
-}
-
-static int UnwindToSignalFrame(unw_cursor_t *cursor) {
-  for (;;) {
-    int ret = unw_is_signal_frame(cursor);
-    if (ret < 0) {
-      return ret;
-    }
-    if (ret != 0) {
-      return 0;  // Found the signal frame.
-    }
-    ret = unw_step(cursor);
-    if (ret < 0) {
-      return ret;
-    }
-  }
-}
-
-// IPToString returns a human-readable representation of |ip|, using debug
-// information from |ctx| if available. |ip| must be the address of |ctx|'s
-// signal frame. This function is async-signal-safe.
-static std::array<char, 256> IPToString(unw_word_t ip, unw_context_t *ctx) {
-  std::array<char, 256> ret;
-  // Use a new cursor. The caller's cursor has already been unwound, but
-  // |unw_get_proc_name| is slow so we do not wish to call it all the time.
-  unw_cursor_t cursor;
-  // Work around a bug in libunwind. See
-  // https://git.savannah.gnu.org/gitweb/?p=libunwind.git;a=commit;h=819bf51bbd2da462c2ec3401e8ac9153b6e725e3
-  OPENSSL_memset(&cursor, 0, sizeof(cursor));
-  unw_word_t off;
-  if (unw_init_local(&cursor, ctx) != 0 ||
-      UnwindToSignalFrame(&cursor) != 0 ||
-      unw_get_proc_name(&cursor, ret.data(), ret.size(), &off) != 0) {
-    StrCatSignalSafe(bssl::MakeSpan(ret), "0x", WordToHex(ip).data());
-    return ret;
-  }
-  size_t len = strlen(ret.data());
-  // Print the offset in decimal, to match gdb's disassembly output and ease
-  // debugging.
-  StrCatSignalSafe(bssl::MakeSpan(ret).subspan(len), "+",
-                   WordToDecimal(off).data(), " (0x", WordToHex(ip).data(),
-                   ")");
-  return ret;
-}
-
 static pthread_t g_main_thread;
 
-// g_in_trampoline is true if we are in an instrumented |abi_test_trampoline|
-// call, in the region that triggers |SIGTRAP|.
-static bool g_in_trampoline = false;
-// g_unwind_function_done, if |g_in_trampoline| is true, is whether the function
-// under test has returned. It is undefined otherwise.
-static bool g_unwind_function_done;
-// g_trampoline_state, if |g_in_trampoline| is true, is the state the function
-// under test must preserve. It is undefined otherwise.
-static CallerState g_trampoline_state;
-// g_trampoline_sp, if |g_in_trampoline| is true, is the stack pointer of the
-// trampoline frame. It is undefined otherwise.
-static unw_word_t g_trampoline_sp;
-
-// kMaxUnwindErrors is the maximum number of unwind errors reported per
-// function. If a function's unwind tables are wrong, we are otherwise likely to
-// repeat the same error at multiple addresses.
-static constexpr size_t kMaxUnwindErrors = 10;
-
-// Errors are saved in a signal handler. We use a static buffer to avoid
-// allocation.
-static size_t num_unwind_errors = 0;
-static char unwind_errors[kMaxUnwindErrors][512];
-
-template <typename... Args>
-static void AddUnwindError(Args... args) {
-  if (num_unwind_errors >= kMaxUnwindErrors) {
-    return;
-  }
-  StrCatSignalSafe(unwind_errors[num_unwind_errors], args...);
-  num_unwind_errors++;
-}
-
-template <typename... Args>
-[[noreturn]] static void FatalError(Args... args) {
-  // We cannot use |snprintf| here because it is not async-signal-safe.
-  char buf[512];
-  StrCatSignalSafe(buf, args..., "\n");
-  write(STDERR_FILENO, buf, strlen(buf));
-  abort();
-}
-
 static void TrapHandler(int sig) {
   // Note this is a signal handler, so only async-signal-safe functions may be
   // used here. See signal-safety(7). libunwind promises local unwind is
@@ -348,126 +702,12 @@
 
   unw_context_t ctx;
   int ret = unw_getcontext(&ctx);
-  unw_cursor_t cursor;
-  // Work around a bug in libunwind which breaks rax and rdx recovery. This
-  // breaks functions which temporarily use rax as the CFA register. See
-  // https://git.savannah.gnu.org/gitweb/?p=libunwind.git;a=commit;h=819bf51bbd2da462c2ec3401e8ac9153b6e725e3
-  OPENSSL_memset(&cursor, 0, sizeof(cursor));
-  ret = ret < 0 ? ret : unw_init_local(&cursor, &ctx);
-  ret = ret < 0 ? ret : UnwindToSignalFrame(&cursor);
-  unw_word_t sp, ip;
-  ret = ret < 0 ? ret : unw_get_reg(&cursor, UNW_REG_SP, &sp);
-  ret = ret < 0 ? ret : unw_get_reg(&cursor, UNW_REG_IP, &ip);
   if (ret < 0) {
-    FatalError("Error initializing unwind cursor: ", unw_strerror(ret));
+    FatalError("Error getting unwind context: ", unw_strerror(ret));
   }
 
-  const unw_word_t kStartAddress =
-      reinterpret_cast<unw_word_t>(&abi_test_unwind_start);
-  const unw_word_t kReturnAddress =
-      reinterpret_cast<unw_word_t>(&abi_test_unwind_return);
-  const unw_word_t kStopAddress =
-      reinterpret_cast<unw_word_t>(&abi_test_unwind_stop);
-  if (!g_in_trampoline) {
-    if (ip != kStartAddress) {
-      FatalError("Unexpected SIGTRAP at ", IPToString(ip, &ctx).data());
-    }
-
-    // Save the current state and begin.
-    g_in_trampoline = true;
-    g_unwind_function_done = false;
-    g_trampoline_sp = sp;
-    ret = CallerStateFromUNWCursor(&g_trampoline_state, &cursor);
-    if (ret < 0) {
-      FatalError("Error getting initial caller state: ", unw_strerror(ret));
-    }
-  } else {
-    if (sp == g_trampoline_sp || g_unwind_function_done) {
-      // |g_unwind_function_done| should imply |sp| is |g_trampoline_sp|, but
-      // clearing the trap flag in x86 briefly displaces the stack pointer.
-      //
-      // Also note we check both |ip| and |sp| below, in case the function under
-      // test is also |abi_test_trampoline|.
-      if (ip == kReturnAddress && sp == g_trampoline_sp) {
-        g_unwind_function_done = true;
-      }
-      if (ip == kStopAddress && sp == g_trampoline_sp) {
-        // |SIGTRAP| is fatal again.
-        g_in_trampoline = false;
-      }
-    } else if (IsAncestorStackFrame(sp, g_trampoline_sp)) {
-      // This should never happen. We went past |g_trampoline_sp| without
-      // stopping at |kStopAddress|.
-      AddUnwindError("stack frame is before caller at ",
-                     IPToString(ip, &ctx).data());
-      g_in_trampoline = false;
-    } else if (num_unwind_errors < kMaxUnwindErrors) {
-      for (;;) {
-        ret = unw_step(&cursor);
-        if (ret < 0) {
-          AddUnwindError("error unwinding from ", IPToString(ip, &ctx).data(),
-                         ": ", unw_strerror(ret));
-          break;
-        }
-        if (ret == 0) {
-          AddUnwindError("could not unwind to starting frame from ",
-                         IPToString(ip, &ctx).data());
-          break;
-        }
-
-        unw_word_t cur_sp;
-        ret = unw_get_reg(&cursor, UNW_REG_SP, &cur_sp);
-        if (ret < 0) {
-          AddUnwindError("error recovering stack pointer unwinding from ",
-                         IPToString(ip, &ctx).data(), ": ", unw_strerror(ret));
-          break;
-        }
-        if (IsAncestorStackFrame(cur_sp, g_trampoline_sp)) {
-          AddUnwindError("unwound past starting frame from ",
-                         IPToString(ip, &ctx).data());
-          break;
-        }
-        if (cur_sp == g_trampoline_sp) {
-          // We found the parent frame. Check the return address.
-          unw_word_t cur_ip;
-          ret = unw_get_reg(&cursor, UNW_REG_IP, &cur_ip);
-          if (ret < 0) {
-            AddUnwindError("error recovering return address unwinding from ",
-                           IPToString(ip, &ctx).data(), ": ",
-                           unw_strerror(ret));
-          } else if (cur_ip != kReturnAddress) {
-            AddUnwindError("wrong return address unwinding from ",
-                           IPToString(ip, &ctx).data());
-          }
-
-          // Check the remaining registers.
-          CallerState state;
-          ret = CallerStateFromUNWCursor(&state, &cursor);
-          if (ret < 0) {
-            AddUnwindError("error recovering registers unwinding from ",
-                           IPToString(ip, &ctx).data(), ": ",
-                           unw_strerror(ret));
-          } else {
-            ForEachMismatch(state, g_trampoline_state, [&](const char *reg) {
-              AddUnwindError(reg, " was not recovered unwinding from ",
-                             IPToString(ip, &ctx).data());
-            });
-          }
-          break;
-        }
-      }
-    }
-  }
-}
-
-static void ReadUnwindResult(Result *out) {
-  for (size_t i = 0; i < num_unwind_errors; i++) {
-    out->errors.emplace_back(unwind_errors[i]);
-  }
-  if (num_unwind_errors == kMaxUnwindErrors) {
-    out->errors.emplace_back("(additional errors omitted)");
-  }
-  num_unwind_errors = 0;
+  UnwindCursor cursor(&ctx);
+  CheckUnwind(&cursor);
 }
 
 static void EnableUnwindTestsImpl() {
@@ -490,14 +730,16 @@
 
   g_unwind_tests_enabled = true;
 }
+#endif  // OPENSSL_WINDOWS
 
-#else
-// TODO(davidben): Implement an SEH-based unwind-tester.
+#else  // !SUPPORTS_UNWIND_TEST
+
 #if defined(SUPPORTS_ABI_TEST)
 static void ReadUnwindResult(Result *) {}
 #endif
 static void EnableUnwindTestsImpl() {}
-#endif  // UNWIND_TEST_SIGTRAP
+
+#endif  // SUPPORTS_UNWIND_TEST
 
 }  // namespace internal
 
diff --git a/crypto/test/abi_test.h b/crypto/test/abi_test.h
index 91b2d42..bf25552 100644
--- a/crypto/test/abi_test.h
+++ b/crypto/test/abi_test.h
@@ -285,8 +285,20 @@
 //
 // Functional testing requires coverage of input values, while ABI testing only
 // requires branch coverage. Most of our assembly is constant-time, so usually
-// only a few instrumented calls are necessray.
-#define CHECK_ABI(...)                                                   \
+// only a few instrumented calls are necessary.
+//
+// TODO(https://crbug.com/boringssl/259): Most of Windows assembly currently
+// fails SEH testing. For now, |CHECK_ABI| behaves like |CHECK_ABI_NO_UNWIND|
+// on Windows. Functions which work with unwind testing on Windows should use
+// |CHECK_ABI_SEH|.
+#if defined(OPENSSL_WINDOWS)
+#define CHECK_ABI(...) CHECK_ABI_NO_UNWIND(__VA_ARGS__)
+#else
+#define CHECK_ABI(...) CHECK_ABI_SEH(__VA_ARGS__)
+#endif
+
+// CHECK_ABI_SEH behaves like |CHECK_ABI| but enables unwind testing on Windows.
+#define CHECK_ABI_SEH(...)                                               \
   abi_test::internal::CheckGTest(#__VA_ARGS__, __FILE__, __LINE__, true, \
                                  __VA_ARGS__)
 
@@ -339,12 +351,19 @@
 void abi_test_unwind_stop(Uncallable);
 
 // abi_test_bad_unwind_wrong_register preserves the ABI, but annotates the wrong
-// register in CFI metadata.
+// register in unwind metadata.
 void abi_test_bad_unwind_wrong_register(void);
 
 // abi_test_bad_unwind_temporary preserves the ABI, but temporarily corrupts the
 // storage space for a saved register, breaking unwind.
 void abi_test_bad_unwind_temporary(void);
+
+#if defined(OPENSSL_WINDOWS)
+// abi_test_bad_unwind_epilog preserves the ABI, and correctly annotates the
+// prolog, but the epilog does not match Win64's rules, breaking unwind during
+// the epilog.
+void abi_test_bad_unwind_epilog(void);
+#endif
 #endif  // OPENSSL_X86_64
 
 #if defined(OPENSSL_X86_64) || defined(OPENSSL_X86)
diff --git a/crypto/test/asm/trampoline-x86_64.pl b/crypto/test/asm/trampoline-x86_64.pl
index 57d70b2..75aae45 100755
--- a/crypto/test/asm/trampoline-x86_64.pl
+++ b/crypto/test/asm/trampoline-x86_64.pl
@@ -139,7 +139,7 @@
 .globl	abi_test_trampoline
 .align	16
 abi_test_trampoline:
-.Labi_test_trampoline_begin:
+.Labi_test_trampoline_seh_begin:
 .cfi_startproc
 	# Stack layout:
 	#   8 bytes - align
@@ -178,7 +178,7 @@
 $code .= <<____;
 	subq	\$$stack_alloc_size, %rsp
 .cfi_adjust_cfa_offset	$stack_alloc_size
-.Labi_test_trampoline_prolog_alloc:
+.Labi_test_trampoline_seh_prolog_alloc:
 ____
 $code .= <<____ if (!$win64);
 	movq	$unwind, $unwind_offset(%rsp)
@@ -194,11 +194,11 @@
   $off -= $stack_alloc_size + 8;
   return <<____;
 .cfi_offset	$reg, $off
-.Labi_test_trampoline_prolog_$reg:
+.Labi_test_trampoline_seh_prolog_$reg:
 ____
 });
 $code .= <<____;
-.Labi_test_trampoline_prolog_end:
+.Labi_test_trampoline_seh_prolog_end:
 ____
 
 $code .= load_caller_state(0, $state);
@@ -295,7 +295,7 @@
 	# %rax already contains \$func's return value, unmodified.
 	ret
 .cfi_endproc
-.Labi_test_trampoline_end:
+.Labi_test_trampoline_seh_end:
 .size	abi_test_trampoline,.-abi_test_trampoline
 ____
 
@@ -327,18 +327,25 @@
 
 $code .= <<____;
 # abi_test_bad_unwind_wrong_register preserves the ABI, but annotates the wrong
-# register in CFI metadata.
+# register in unwind metadata.
 # void abi_test_bad_unwind_wrong_register(void);
 .type	abi_test_bad_unwind_wrong_register, \@abi-omnipotent
 .globl	abi_test_bad_unwind_wrong_register
 .align	16
 abi_test_bad_unwind_wrong_register:
 .cfi_startproc
+.Labi_test_bad_unwind_wrong_register_seh_begin:
 	pushq	%r12
 .cfi_push	%r13	# This should be %r12
+.Labi_test_bad_unwind_wrong_register_seh_push_r13:
+	# Windows evaluates epilogs directly in the unwinder, rather than using
+	# unwind codes. Add a nop so there is one non-epilog point (immediately
+	# before the nop) where the unwinder can observe the mistake.
+	nop
 	popq	%r12
 .cfi_pop	%r12
 	ret
+.Labi_test_bad_unwind_wrong_register_seh_end:
 .cfi_endproc
 .size	abi_test_bad_unwind_wrong_register,.-abi_test_bad_unwind_wrong_register
 
@@ -350,20 +357,24 @@
 .align	16
 abi_test_bad_unwind_temporary:
 .cfi_startproc
+.Labi_test_bad_unwind_temporary_seh_begin:
 	pushq	%r12
 .cfi_push	%r12
+.Labi_test_bad_unwind_temporary_seh_push_r12:
 
-	inc	%r12
-	movq	%r12, (%rsp)
-	# Unwinding from here is incorrect.
+	movq	%r12, %rax
+	inc	%rax
+	movq	%rax, (%rsp)
+	# Unwinding from here is incorrect. Although %r12 itself has not been
+	# changed, the unwind codes say to look in (%rsp) instead.
 
-	dec	%r12
 	movq	%r12, (%rsp)
 	# Unwinding is now fixed.
 
 	popq	%r12
 .cfi_pop	%r12
 	ret
+.Labi_test_bad_unwind_temporary_seh_end:
 .cfi_endproc
 .size	abi_test_bad_unwind_temporary,.-abi_test_bad_unwind_temporary
 
@@ -392,6 +403,29 @@
 ____
 
 if ($win64) {
+  $code .= <<____;
+# abi_test_bad_unwind_epilog preserves the ABI, and correctly annotates the
+# prolog, but the epilog does not match Win64's rules, breaking unwind during
+# the epilog.
+# void abi_test_bad_unwind_epilog(void);
+.type	abi_test_bad_unwind_epilog, \@abi-omnipotent
+.globl	abi_test_bad_unwind_epilog
+.align	16
+abi_test_bad_unwind_epilog:
+.Labi_test_bad_unwind_epilog_seh_begin:
+	pushq	%r12
+.Labi_test_bad_unwind_epilog_seh_push_r12:
+
+	nop
+
+	# The epilog should begin here, but the nop makes it invalid.
+	popq	%r12
+	nop
+	ret
+.Labi_test_bad_unwind_epilog_seh_end:
+.size	abi_test_bad_unwind_epilog,.-abi_test_bad_unwind_epilog
+____
+
   # Add unwind metadata for SEH.
   #
   # TODO(davidben): This is all manual right now. Once we've added SEH tests,
@@ -401,6 +435,7 @@
   # error-prone and non-standard custom handlers.
 
   # See https://docs.microsoft.com/en-us/cpp/build/struct-unwind-code?view=vs-2017
+  my $UWOP_PUSH_NONVOL = 0;
   my $UWOP_ALLOC_LARGE = 1;
   my $UWOP_ALLOC_SMALL = 2;
   my $UWOP_SAVE_NONVOL = 4;
@@ -415,7 +450,7 @@
   if ($stack_alloc_size <= 128) {
     my $info = $UWOP_ALLOC_SMALL | ((($stack_alloc_size - 8) / 8) << 4);
     $unwind_codes .= <<____;
-	.byte	.Labi_test_trampoline_prolog_alloc-.Labi_test_trampoline_begin
+	.byte	.Labi_test_trampoline_seh_prolog_alloc-.Labi_test_trampoline_seh_begin
 	.byte	$info
 ____
     $num_slots++;
@@ -424,7 +459,7 @@
     my $info = $UWOP_ALLOC_LARGE;
     my $value = $stack_alloc_size / 8;
     $unwind_codes .= <<____;
-	.byte	.Labi_test_trampoline_prolog_alloc-.Labi_test_trampoline_begin
+	.byte	.Labi_test_trampoline_seh_prolog_alloc-.Labi_test_trampoline_seh_begin
 	.byte	$info
 	.value	$value
 ____
@@ -439,7 +474,7 @@
       my $info = $UWOP_SAVE_NONVOL | ($UWOP_REG_NUMBER{$reg} << 4);
       my $value = $reg_offsets{$reg} / 8;
       $unwind_codes .= <<____;
-	.byte	.Labi_test_trampoline_prolog_$reg-.Labi_test_trampoline_begin
+	.byte	.Labi_test_trampoline_seh_prolog_$reg-.Labi_test_trampoline_seh_begin
 	.byte	$info
 	.value	$value
 ____
@@ -448,7 +483,7 @@
       my $info = $UWOP_SAVE_XMM128 | (substr($reg, 3) << 4);
       my $value = $reg_offsets{$reg} / 16;
       $unwind_codes .= <<____;
-	.byte	.Labi_test_trampoline_prolog_$reg-.Labi_test_trampoline_begin
+	.byte	.Labi_test_trampoline_seh_prolog_$reg-.Labi_test_trampoline_seh_begin
 	.byte	$info
 	.value	$value
 ____
@@ -462,19 +497,61 @@
 .section	.pdata
 .align	4
 	# https://docs.microsoft.com/en-us/cpp/build/struct-runtime-function?view=vs-2017
-	.rva	.Labi_test_trampoline_begin
-	.rva	.Labi_test_trampoline_end
-	.rva	.Labi_test_trampoline_info
+	.rva	.Labi_test_trampoline_seh_begin
+	.rva	.Labi_test_trampoline_seh_end
+	.rva	.Labi_test_trampoline_seh_info
+
+	.rva	.Labi_test_bad_unwind_wrong_register_seh_begin
+	.rva	.Labi_test_bad_unwind_wrong_register_seh_end
+	.rva	.Labi_test_bad_unwind_wrong_register_seh_info
+
+	.rva	.Labi_test_bad_unwind_temporary_seh_begin
+	.rva	.Labi_test_bad_unwind_temporary_seh_end
+	.rva	.Labi_test_bad_unwind_temporary_seh_info
+
+	.rva	.Labi_test_bad_unwind_epilog_seh_begin
+	.rva	.Labi_test_bad_unwind_epilog_seh_end
+	.rva	.Labi_test_bad_unwind_epilog_seh_info
 
 .section	.xdata
 .align	8
-.Labi_test_trampoline_info:
+.Labi_test_trampoline_seh_info:
 	# https://docs.microsoft.com/en-us/cpp/build/struct-unwind-info?view=vs-2017
 	.byte	1	# version 1, no flags
-	.byte	.Labi_test_trampoline_prolog_end-.Labi_test_trampoline_begin
+	.byte	.Labi_test_trampoline_seh_prolog_end-.Labi_test_trampoline_seh_begin
 	.byte	$num_slots
 	.byte	0	# no frame register
 $unwind_codes
+
+.align	8
+.Labi_test_bad_unwind_wrong_register_seh_info:
+	.byte	1	# version 1, no flags
+	.byte	.Labi_test_bad_unwind_wrong_register_seh_push_r13-.Labi_test_bad_unwind_wrong_register_seh_begin
+	.byte	1	# one slot
+	.byte	0	# no frame register
+
+	.byte	.Labi_test_bad_unwind_wrong_register_seh_push_r13-.Labi_test_bad_unwind_wrong_register_seh_begin
+	.byte	@{[$UWOP_PUSH_NONVOL | ($UWOP_REG_NUMBER{r13} << 4)]}
+
+.align	8
+.Labi_test_bad_unwind_temporary_seh_info:
+	.byte	1	# version 1, no flags
+	.byte	.Labi_test_bad_unwind_temporary_seh_push_r12-.Labi_test_bad_unwind_temporary_seh_begin
+	.byte	1	# one slot
+	.byte	0	# no frame register
+
+	.byte	.Labi_test_bad_unwind_temporary_seh_push_r12-.Labi_test_bad_unwind_temporary_seh_begin
+	.byte	@{[$UWOP_PUSH_NONVOL | ($UWOP_REG_NUMBER{r12} << 4)]}
+
+.align	8
+.Labi_test_bad_unwind_epilog_seh_info:
+	.byte	1	# version 1, no flags
+	.byte	.Labi_test_bad_unwind_epilog_seh_push_r12-.Labi_test_bad_unwind_epilog_seh_begin
+	.byte	1	# one slot
+	.byte	0	# no frame register
+
+	.byte	.Labi_test_bad_unwind_epilog_seh_push_r12-.Labi_test_bad_unwind_epilog_seh_begin
+	.byte	@{[$UWOP_PUSH_NONVOL | ($UWOP_REG_NUMBER{r12} << 4)]}
 ____
 }