Align perlasm SEH directives with gas/clang-assembler

perlasm broadly uses gas syntax. gas and clang-assembler already have
SEH directives. From what I can tell, no one ever properly documented
this, but this mail describes this. LLVM's test data also has examples.
https://sourceware.org/legacy-ml/binutils/2009-08/msg00193.html

First, we named ours based on the MASM directives and prepended ".seh_".
gas says "endprologue" instead of "endprolog", "savexmm" instead of
"savexmm128", and "stackalloc" instead of "allocstack".  Since perlasm
mostly looks like gas, I've switched to the gas spellings.

Second, we made .seh_endprologue implicit because it's always
immediately after the last directive. Both MASM and clang-assembler make
it explicit. Synthesizing an .seh_endprologue for those syntaxes would
require buffering the up the whole function, so just require it be
explicit in the source.

The last difference is that gas says ".seh_proc name_of_function". I've
not aligned on that one because MASM actually integrates it into the
PROC line. You add the FRAME keyword or not depending on whether it's a
frame function. To make the MASM output easier, I think we need to
diverge from both gas and what we've currently done. I'll resolve that
in a follow-up change.

Along the way, fix a couple of instances where the _CET_ENDBR got put on
the wrong side of the SEH directive. I don't think that macro works on
Windows anyway, so it's moot. But if it did emit anything, it should be
included in the prologue.

Bug: 571
Change-Id: I39701a952a654afe6bfc8b3b908ca8fe65d6f1a1
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/68292
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Bob Beck <bbe@google.com>
diff --git a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
index 82e46d4..eabaa1b 100644
--- a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
@@ -474,7 +474,7 @@
 if ($win64) {
 $code.=<<___
 	lea	-0xa8(%rsp),%rsp		# 8 extra bytes to align the stack
-.seh_allocstack	0xa8
+.seh_stackalloc	0xa8
 .seh_setframe	%rbp, 0xa8+5*8
 	# Load the last two parameters. These go into %rdi and %rsi, which are
 	# non-volatile on Windows, so stash them in the parameter stack area
@@ -487,25 +487,26 @@
 	mov	0x38(%rbp), $Htable
 	# Save non-volatile XMM registers.
 	movaps	%xmm6,-0xd0(%rbp)
-.seh_savexmm128	%xmm6, 0xa8+5*8-0xd0
+.seh_savexmm	%xmm6, 0xa8+5*8-0xd0
 	movaps	%xmm7,-0xc0(%rbp)
-.seh_savexmm128	%xmm7, 0xa8+5*8-0xc0
+.seh_savexmm	%xmm7, 0xa8+5*8-0xc0
 	movaps	%xmm8,-0xb0(%rbp)
-.seh_savexmm128	%xmm8, 0xa8+5*8-0xb0
+.seh_savexmm	%xmm8, 0xa8+5*8-0xb0
 	movaps	%xmm9,-0xa0(%rbp)
-.seh_savexmm128	%xmm9, 0xa8+5*8-0xa0
+.seh_savexmm	%xmm9, 0xa8+5*8-0xa0
 	movaps	%xmm10,-0x90(%rbp)
-.seh_savexmm128	%xmm10, 0xa8+5*8-0x90
+.seh_savexmm	%xmm10, 0xa8+5*8-0x90
 	movaps	%xmm11,-0x80(%rbp)
-.seh_savexmm128	%xmm11, 0xa8+5*8-0x80
+.seh_savexmm	%xmm11, 0xa8+5*8-0x80
 	movaps	%xmm12,-0x70(%rbp)
-.seh_savexmm128	%xmm12, 0xa8+5*8-0x70
+.seh_savexmm	%xmm12, 0xa8+5*8-0x70
 	movaps	%xmm13,-0x60(%rbp)
-.seh_savexmm128	%xmm13, 0xa8+5*8-0x60
+.seh_savexmm	%xmm13, 0xa8+5*8-0x60
 	movaps	%xmm14,-0x50(%rbp)
-.seh_savexmm128	%xmm14, 0xa8+5*8-0x50
+.seh_savexmm	%xmm14, 0xa8+5*8-0x50
 	movaps	%xmm15,-0x40(%rbp)
-.seh_savexmm128	%xmm15, 0xa8+5*8-0x40
+.seh_savexmm	%xmm15, 0xa8+5*8-0x40
+.seh_endprologue
 ___
 }
 $code.=<<___;
@@ -753,7 +754,7 @@
 if ($win64) {
 $code.=<<___
 	lea	-0xa8(%rsp),%rsp		# 8 extra bytes to align the stack
-.seh_allocstack	0xa8
+.seh_stackalloc	0xa8
 .seh_setframe	%rbp, 0xa8+5*8
 	# Load the last two parameters. These go into %rdi and %rsi, which are
 	# non-volatile on Windows, so stash them in the parameter stack area
@@ -766,25 +767,26 @@
 	mov	0x38(%rbp), $Htable
 	# Save non-volatile XMM registers.
 	movaps	%xmm6,-0xd0(%rbp)
-.seh_savexmm128	%xmm6, 0xa8+5*8-0xd0
+.seh_savexmm	%xmm6, 0xa8+5*8-0xd0
 	movaps	%xmm7,-0xc0(%rbp)
-.seh_savexmm128	%xmm7, 0xa8+5*8-0xc0
+.seh_savexmm	%xmm7, 0xa8+5*8-0xc0
 	movaps	%xmm8,-0xb0(%rbp)
-.seh_savexmm128	%xmm8, 0xa8+5*8-0xb0
+.seh_savexmm	%xmm8, 0xa8+5*8-0xb0
 	movaps	%xmm9,-0xa0(%rbp)
-.seh_savexmm128	%xmm9, 0xa8+5*8-0xa0
+.seh_savexmm	%xmm9, 0xa8+5*8-0xa0
 	movaps	%xmm10,-0x90(%rbp)
-.seh_savexmm128	%xmm10, 0xa8+5*8-0x90
+.seh_savexmm	%xmm10, 0xa8+5*8-0x90
 	movaps	%xmm11,-0x80(%rbp)
-.seh_savexmm128	%xmm11, 0xa8+5*8-0x80
+.seh_savexmm	%xmm11, 0xa8+5*8-0x80
 	movaps	%xmm12,-0x70(%rbp)
-.seh_savexmm128	%xmm12, 0xa8+5*8-0x70
+.seh_savexmm	%xmm12, 0xa8+5*8-0x70
 	movaps	%xmm13,-0x60(%rbp)
-.seh_savexmm128	%xmm13, 0xa8+5*8-0x60
+.seh_savexmm	%xmm13, 0xa8+5*8-0x60
 	movaps	%xmm14,-0x50(%rbp)
-.seh_savexmm128	%xmm14, 0xa8+5*8-0x50
+.seh_savexmm	%xmm14, 0xa8+5*8-0x50
 	movaps	%xmm15,-0x40(%rbp)
-.seh_savexmm128	%xmm15, 0xa8+5*8-0x40
+.seh_savexmm	%xmm15, 0xa8+5*8-0x40
+.seh_endprologue
 ___
 }
 $code.=<<___;
diff --git a/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl b/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl
index 4a850f4..bdfbe04 100644
--- a/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl
@@ -108,11 +108,12 @@
 ____
 $code .= <<____ if ($win64);
 	subq	\$40, %rsp
-.seh_allocstack	40
+.seh_stackalloc	40
 	movdqa	%xmm6, (%rsp)
-.seh_savexmm128	%xmm6, 0
+.seh_savexmm	%xmm6, 0
 	movdqa	%xmm10, 16(%rsp)
-.seh_savexmm128	%xmm10, 16
+.seh_savexmm	%xmm10, 16
+.seh_endprologue
 ____
 $code .= <<____;
 	movdqu	($Xi), %xmm0
@@ -251,13 +252,14 @@
 ____
 $code .= <<____ if ($win64);
 	subq	\$56, %rsp
-.seh_allocstack	56
+.seh_stackalloc	56
 	movdqa	%xmm6, (%rsp)
-.seh_savexmm128	%xmm6, 0
+.seh_savexmm	%xmm6, 0
 	movdqa	%xmm10, 16(%rsp)
-.seh_savexmm128	%xmm10, 16
+.seh_savexmm	%xmm10, 16
 	movdqa	%xmm11, 32(%rsp)
-.seh_savexmm128	%xmm11, 32
+.seh_savexmm	%xmm11, 32
+.seh_endprologue
 ____
 $code .= <<____;
 	movdqu	($Xi), %xmm0
diff --git a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
index 33ee1cb..c026d8f 100644
--- a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
@@ -210,9 +210,10 @@
 ___
 $code.=<<___ if ($win64);
 	sub	\$0x18,%rsp
-.seh_allocstack	0x18
+.seh_stackalloc	0x18
 	movaps	%xmm6,(%rsp)
-.seh_savexmm128	%xmm6, 0
+.seh_savexmm	%xmm6, 0
+.seh_endprologue
 ___
 $code.=<<___;
 	movdqu		($Xip),$Hkey
@@ -347,27 +348,28 @@
 $code.=<<___ if ($win64);
 	lea	-0x88(%rsp),%rax
 	lea	-0x20(%rax),%rsp
-.seh_allocstack	0x20+0x88
+.seh_stackalloc	0x20+0x88
 	movaps	%xmm6,-0x20(%rax)
-.seh_savexmm128	%xmm6, 0x20-0x20
+.seh_savexmm	%xmm6, 0x20-0x20
 	movaps	%xmm7,-0x10(%rax)
-.seh_savexmm128	%xmm7, 0x20-0x10
+.seh_savexmm	%xmm7, 0x20-0x10
 	movaps	%xmm8,0(%rax)
-.seh_savexmm128	%xmm8, 0x20+0
+.seh_savexmm	%xmm8, 0x20+0
 	movaps	%xmm9,0x10(%rax)
-.seh_savexmm128	%xmm9, 0x20+0x10
+.seh_savexmm	%xmm9, 0x20+0x10
 	movaps	%xmm10,0x20(%rax)
-.seh_savexmm128	%xmm10, 0x20+0x20
+.seh_savexmm	%xmm10, 0x20+0x20
 	movaps	%xmm11,0x30(%rax)
-.seh_savexmm128	%xmm11, 0x20+0x30
+.seh_savexmm	%xmm11, 0x20+0x30
 	movaps	%xmm12,0x40(%rax)
-.seh_savexmm128	%xmm12, 0x20+0x40
+.seh_savexmm	%xmm12, 0x20+0x40
 	movaps	%xmm13,0x50(%rax)
-.seh_savexmm128	%xmm13, 0x20+0x50
+.seh_savexmm	%xmm13, 0x20+0x50
 	movaps	%xmm14,0x60(%rax)
-.seh_savexmm128	%xmm14, 0x20+0x60
+.seh_savexmm	%xmm14, 0x20+0x60
 	movaps	%xmm15,0x70(%rax)
-.seh_savexmm128	%xmm15, 0x20+0x70
+.seh_savexmm	%xmm15, 0x20+0x70
+.seh_endprologue
 ___
 $code.=<<___;
 	movdqa		.Lbswap_mask(%rip),$T3
@@ -704,6 +706,7 @@
 .align	32
 gcm_init_avx:
 .cfi_startproc
+.seh_startproc
 	_CET_ENDBR
 ___
 if ($avx) {
@@ -711,11 +714,11 @@
 my $HK="%xmm6";
 
 $code.=<<___ if ($win64);
-.seh_startproc
 	sub	\$0x18,%rsp
-.seh_allocstack	0x18
+.seh_stackalloc	0x18
 	movaps	%xmm6,(%rsp)
-.seh_savexmm128	%xmm6, 0
+.seh_savexmm	%xmm6, 0
+.seh_endprologue
 ___
 $code.=<<___;
 	vzeroupper
@@ -862,6 +865,7 @@
 .align	32
 gcm_ghash_avx:
 .cfi_startproc
+.seh_startproc
 	_CET_ENDBR
 ___
 if ($avx) {
@@ -872,30 +876,30 @@
     $Xi,$Xo,$Tred,$bswap,$Ii,$Ij) = map("%xmm$_",(0..15));
 
 $code.=<<___ if ($win64);
-.seh_startproc
 	lea	-0x88(%rsp),%rax
 	lea	-0x20(%rax),%rsp
-.seh_allocstack	0x20+0x88
+.seh_stackalloc	0x20+0x88
 	movaps	%xmm6,-0x20(%rax)
-.seh_savexmm128	%xmm6, 0x20-0x20
+.seh_savexmm	%xmm6, 0x20-0x20
 	movaps	%xmm7,-0x10(%rax)
-.seh_savexmm128	%xmm7, 0x20-0x10
+.seh_savexmm	%xmm7, 0x20-0x10
 	movaps	%xmm8,0(%rax)
-.seh_savexmm128	%xmm8, 0x20+0
+.seh_savexmm	%xmm8, 0x20+0
 	movaps	%xmm9,0x10(%rax)
-.seh_savexmm128	%xmm9, 0x20+0x10
+.seh_savexmm	%xmm9, 0x20+0x10
 	movaps	%xmm10,0x20(%rax)
-.seh_savexmm128	%xmm10, 0x20+0x20
+.seh_savexmm	%xmm10, 0x20+0x20
 	movaps	%xmm11,0x30(%rax)
-.seh_savexmm128	%xmm11, 0x20+0x30
+.seh_savexmm	%xmm11, 0x20+0x30
 	movaps	%xmm12,0x40(%rax)
-.seh_savexmm128	%xmm12, 0x20+0x40
+.seh_savexmm	%xmm12, 0x20+0x40
 	movaps	%xmm13,0x50(%rax)
-.seh_savexmm128	%xmm13, 0x20+0x50
+.seh_savexmm	%xmm13, 0x20+0x50
 	movaps	%xmm14,0x60(%rax)
-.seh_savexmm128	%xmm14, 0x20+0x60
+.seh_savexmm	%xmm14, 0x20+0x60
 	movaps	%xmm15,0x70(%rax)
-.seh_savexmm128	%xmm15, 0x20+0x70
+.seh_savexmm	%xmm15, 0x20+0x70
+.seh_endprologue
 ___
 $code.=<<___;
 	vzeroupper
diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl
index 9fb1de1..5b7705d 100755
--- a/crypto/perlasm/x86_64-xlate.pl
+++ b/crypto/perlasm/x86_64-xlate.pl
@@ -714,16 +714,21 @@
     }
 }
 { package seh_directive;
-    # This implements directives, like MASM's, for specifying Windows unwind
-    # codes. See https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-170
-    # for details on the Windows unwind mechanism. Unlike MASM's directives, we
-    # have no .seh_endprolog directive. Instead, the last prolog directive is
-    # implicitly the end of the prolog.
+    # This implements directives, like MASM, gas, and clang-assembler for
+    # specifying Windows unwind codes. See
+    # https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-170
+    # for details on the Windows unwind mechanism. As perlasm generally uses gas
+    # syntax, the syntax is patterned after the gas spelling, described in
+    # https://sourceware.org/legacy-ml/binutils/2009-08/msg00193.html
+    #
+    # TODO(https://crbug.com/boringssl/571): Translate to the MASM directives
+    # when using the MASM output. Emit as-is when using "mingw64" output, which
+    # is Windows with gas syntax.
     #
     # TODO(https://crbug.com/boringssl/259): For now, SEH directives are ignored
     # on non-Windows platforms. This means functions need to specify both CFI
     # and SEH directives, often redundantly. Ideally we'd abstract between the
-    # two. E.g., we can synthesize CFI from SEH prologs, but SEH does not
+    # two. E.g., we can synthesize CFI from SEH prologues, but SEH does not
     # annotate epilogs, so we'd need to combine parts from both. Or we can
     # restrict ourselves to a subset of CFI and synthesize SEH from CFI.
     #
@@ -732,7 +737,7 @@
     # complication is the current scheme modifies RDI and RSI (non-volatile on
     # Windows) at the start of the function, and saves them in the parameter
     # stack area. This can be expressed with .seh_savereg, but .seh_savereg is
-    # only usable late in the prolog. However, unwind information gives enough
+    # only usable late in the prologue. However, unwind information gives enough
     # information to locate the parameter stack area at any point in the
     # function, so we can defer conversion or implement other schemes.
 
@@ -777,6 +782,11 @@
 	die "Missing .seh_startproc directive" unless %info;
     }
 
+    sub _check_in_prologue {
+	_check_in_proc();
+	die "Invalid SEH directive after .seh_endprologue" if defined($info{endprologue});
+    }
+
     sub _check_not_in_proc {
 	die "Missing .seh_endproc directive" if %info;
     }
@@ -794,8 +804,8 @@
 	    info_label => $info_label,
 	    # start_label is the start of the function.
 	    start_label => $start_label,
-	    # endprolog is the label of the last unwind code in the function.
-	    endprolog => $start_label,
+	    # endprologue is the label of the end of the prologue.
+	    endprologue => undef,
 	    # unwind_codes contains the textual representation of the
 	    # unwind codes in the function so far.
 	    unwind_codes => "",
@@ -821,14 +831,14 @@
 
     sub _add_unwind_code {
 	my ($op, $value, @extra) = @_;
-	_check_in_proc();
+	_check_in_prologue();
 	if ($op != $UWOP_PUSH_NONVOL) {
 	    $info{has_nonpushreg} = 1;
 	} elsif ($info{has_nonpushreg}) {
-	    die ".seh_pushreg directives must appear first in the prolog";
+	    die ".seh_pushreg directives must appear first in the prologue";
 	}
 
-	my $label = _new_unwind_label("prolog");
+	my $label = _new_unwind_label("prologue");
 	# Encode an UNWIND_CODE structure. See
 	# https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-170#struct-unwind_code
 	my $encoded = $op | ($value << 4);
@@ -844,17 +854,13 @@
 	$info{num_codes} += 1 + scalar(@extra);
 	# Unwind codes are listed in reverse order.
 	$info{unwind_codes} = $codes . $info{unwind_codes};
-	# Track the label of the last unwind code. It implicitly is the end of
-	# the prolog. MASM has an endprolog directive, but it seems to be
-	# unnecessary.
-	$info{endprolog} = $label;
 	return $label;
     }
 
     sub _updating_fixed_allocation {
-	_check_in_proc();
+	_check_in_prologue();
 	if ($info{frame_reg} != 0) {
-	    # Windows documentation does not explicitly forbid .seh_allocstack
+	    # Windows documentation does not explicitly forbid .seh_stackalloc
 	    # after .seh_setframe, but it appears to have no effect. Offsets are
 	    # still relative to the fixed allocation when the frame register was
 	    # established.
@@ -862,7 +868,7 @@
 	}
 	if ($info{has_offset}) {
 	    # Windows documentation does not explicitly forbid .seh_savereg
-	    # before .seh_allocstack, but it does not work very well. Offsets
+	    # before .seh_stackalloc, but it does not work very well. Offsets
 	    # are relative to the top of the final fixed allocation, not where
 	    # RSP currently is.
 	    die "directives with an offset must come after the fixed allocation is established.";
@@ -871,11 +877,8 @@
 
     sub _endproc {
 	_check_in_proc();
-	if ($info{num_codes} == 0) {
-	    # If a Windows function has no directives (i.e. it doesn't touch the
-	    # stack), it is a leaf function and is not expected to appear in
-	    # .pdata or .xdata.
-	    die ".seh_endproc found with no unwind codes";
+	if (!defined($info{endprologue})) {
+	    die "Missing .seh_endprologue";
 	}
 
 	my $end_label = _new_unwind_label("end");
@@ -894,7 +897,7 @@
 	$xdata .= <<____;
 $info{info_label}:
 	.byte	1	# version 1, no flags
-	.byte	$info{endprolog}-$info{start_label}
+	.byte	$info{endprologue}-$info{start_label}
 	.byte	$info{num_codes}
 	.byte	$frame_encoded
 $info{unwind_codes}
@@ -916,7 +919,7 @@
 	    my $label;
 	    SWITCH: for ($dir) {
 		/^startproc$/ && do {
-		    $label = _startproc();
+		    $label = _startproc($1);
 		    last;
 		};
 		/^pushreg$/ && do {
@@ -926,7 +929,7 @@
 		    $label = _add_unwind_code($UWOP_PUSH_NONVOL, $reg_num);
 		    last;
 		};
-		/^allocstack$/ && do {
+		/^stackalloc$/ && do {
 		    my $num = eval($$line);
 		    if ($num <= 0 || $num % 8 != 0) {
 			die "invalid stack allocation: $num";
@@ -976,7 +979,7 @@
 		    $info{has_offset} = 1;
 		    last;
 		};
-		/^savexmm128$/ && do {
+		/^savexmm$/ && do {
 		    $$line =~ /%xmm(\d+)\s*,\s*(.+)/ or die "could not parse .seh_$dir";
 		    my $reg_num = $1;
 		    my $offset = eval($2);
@@ -991,6 +994,19 @@
 		    $info{has_offset} = 1;
 		    last;
 		};
+		/^endprologue$/ && do {
+		    _check_in_prologue();
+		    if ($info{num_codes} == 0) {
+			# If a Windows function has no directives (i.e. it
+			# doesn't touch the stack), it is a leaf function and is
+			# not expected to appear in .pdata or .xdata.
+			die ".seh_endprologue found with no unwind codes";
+		    }
+
+		    $label = _new_unwind_label("endprologue");
+		    $info{endprologue} = $label;
+		    last;
+		};
 		/^endproc$/ && do {
 		    $label = _endproc();
 		    last;
diff --git a/crypto/test/asm/trampoline-x86_64.pl b/crypto/test/asm/trampoline-x86_64.pl
index 53b4bcd..d8d2be2 100755
--- a/crypto/test/asm/trampoline-x86_64.pl
+++ b/crypto/test/asm/trampoline-x86_64.pl
@@ -179,7 +179,7 @@
 $code .= <<____;
 	subq	\$$stack_alloc_size, %rsp
 .cfi_adjust_cfa_offset	$stack_alloc_size
-.seh_allocstack	$stack_alloc_size
+.seh_stackalloc	$stack_alloc_size
 ____
 $code .= <<____ if (!$win64);
 	movq	$unwind, $unwind_offset(%rsp)
@@ -195,12 +195,13 @@
   # pointer just before the call.
   my $cfi_off = $off - $stack_alloc_size - 8;
   my $seh_dir = ".seh_savereg";
-  $seh_dir = ".seh_savexmm128" if ($reg =~ /^xmm/);
+  $seh_dir = ".seh_savexmm" if ($reg =~ /^xmm/);
   return <<____;
 .cfi_offset	$reg, $cfi_off
 $seh_dir	\%$reg, $off
 ____
 });
+$code .= ".seh_endprologue\n";
 
 $code .= load_caller_state(0, $state);
 $code .= <<____;
@@ -342,6 +343,7 @@
 	pushq	%r12
 .cfi_push	%r13	# This should be %r13
 .seh_pushreg	%r13	# This should be %r13
+.seh_endprologue
 	# Windows evaluates epilogs directly in the unwinder, rather than using
 	# unwind codes. Add a nop so there is one non-epilog point (immediately
 	# before the nop) where the unwinder can observe the mistake.
@@ -366,6 +368,7 @@
 	pushq	%r12
 .cfi_push	%r12
 .seh_pushreg	%r12
+.seh_endprologue
 
 	movq	%r12, %rax
 	inc	%rax
@@ -422,6 +425,7 @@
 .seh_startproc
 	pushq	%r12
 .seh_pushreg	%r12
+.seh_endprologue
 
 	nop
 
diff --git a/gen/bcm/aesni-gcm-x86_64-win.asm b/gen/bcm/aesni-gcm-x86_64-win.asm
index d7a2665..7564a1c 100644
--- a/gen/bcm/aesni-gcm-x86_64-win.asm
+++ b/gen/bcm/aesni-gcm-x86_64-win.asm
@@ -363,57 +363,58 @@
 
 	push	rbp
 
-$L$SEH_prolog_aesni_gcm_decrypt_2:
+$L$SEH_prologue_aesni_gcm_decrypt_2:
 	mov	rbp,rsp
 
 	push	rbx
 
-$L$SEH_prolog_aesni_gcm_decrypt_3:
+$L$SEH_prologue_aesni_gcm_decrypt_3:
 	push	r12
 
-$L$SEH_prolog_aesni_gcm_decrypt_4:
+$L$SEH_prologue_aesni_gcm_decrypt_4:
 	push	r13
 
-$L$SEH_prolog_aesni_gcm_decrypt_5:
+$L$SEH_prologue_aesni_gcm_decrypt_5:
 	push	r14
 
-$L$SEH_prolog_aesni_gcm_decrypt_6:
+$L$SEH_prologue_aesni_gcm_decrypt_6:
 	push	r15
 
-$L$SEH_prolog_aesni_gcm_decrypt_7:
+$L$SEH_prologue_aesni_gcm_decrypt_7:
 	lea	rsp,[((-168))+rsp]
-$L$SEH_prolog_aesni_gcm_decrypt_8:
-$L$SEH_prolog_aesni_gcm_decrypt_9:
+$L$SEH_prologue_aesni_gcm_decrypt_8:
+$L$SEH_prologue_aesni_gcm_decrypt_9:
 
 
 
 	mov	QWORD[16+rbp],rdi
-$L$SEH_prolog_aesni_gcm_decrypt_10:
+$L$SEH_prologue_aesni_gcm_decrypt_10:
 	mov	QWORD[24+rbp],rsi
-$L$SEH_prolog_aesni_gcm_decrypt_11:
+$L$SEH_prologue_aesni_gcm_decrypt_11:
 	mov	rdi,QWORD[48+rbp]
 	mov	rsi,QWORD[56+rbp]
 
 	movaps	XMMWORD[(-208)+rbp],xmm6
-$L$SEH_prolog_aesni_gcm_decrypt_12:
+$L$SEH_prologue_aesni_gcm_decrypt_12:
 	movaps	XMMWORD[(-192)+rbp],xmm7
-$L$SEH_prolog_aesni_gcm_decrypt_13:
+$L$SEH_prologue_aesni_gcm_decrypt_13:
 	movaps	XMMWORD[(-176)+rbp],xmm8
-$L$SEH_prolog_aesni_gcm_decrypt_14:
+$L$SEH_prologue_aesni_gcm_decrypt_14:
 	movaps	XMMWORD[(-160)+rbp],xmm9
-$L$SEH_prolog_aesni_gcm_decrypt_15:
+$L$SEH_prologue_aesni_gcm_decrypt_15:
 	movaps	XMMWORD[(-144)+rbp],xmm10
-$L$SEH_prolog_aesni_gcm_decrypt_16:
+$L$SEH_prologue_aesni_gcm_decrypt_16:
 	movaps	XMMWORD[(-128)+rbp],xmm11
-$L$SEH_prolog_aesni_gcm_decrypt_17:
+$L$SEH_prologue_aesni_gcm_decrypt_17:
 	movaps	XMMWORD[(-112)+rbp],xmm12
-$L$SEH_prolog_aesni_gcm_decrypt_18:
+$L$SEH_prologue_aesni_gcm_decrypt_18:
 	movaps	XMMWORD[(-96)+rbp],xmm13
-$L$SEH_prolog_aesni_gcm_decrypt_19:
+$L$SEH_prologue_aesni_gcm_decrypt_19:
 	movaps	XMMWORD[(-80)+rbp],xmm14
-$L$SEH_prolog_aesni_gcm_decrypt_20:
+$L$SEH_prologue_aesni_gcm_decrypt_20:
 	movaps	XMMWORD[(-64)+rbp],xmm15
-$L$SEH_prolog_aesni_gcm_decrypt_21:
+$L$SEH_prologue_aesni_gcm_decrypt_21:
+$L$SEH_endprologue_aesni_gcm_decrypt_22:
 	vzeroupper
 
 	mov	r12,QWORD[64+rbp]
@@ -512,7 +513,7 @@
 
 $L$gcm_dec_abort:
 	ret
-$L$SEH_end_aesni_gcm_decrypt_22:
+$L$SEH_end_aesni_gcm_decrypt_23:
 
 
 
@@ -629,57 +630,58 @@
 
 	push	rbp
 
-$L$SEH_prolog_aesni_gcm_encrypt_2:
+$L$SEH_prologue_aesni_gcm_encrypt_2:
 	mov	rbp,rsp
 
 	push	rbx
 
-$L$SEH_prolog_aesni_gcm_encrypt_3:
+$L$SEH_prologue_aesni_gcm_encrypt_3:
 	push	r12
 
-$L$SEH_prolog_aesni_gcm_encrypt_4:
+$L$SEH_prologue_aesni_gcm_encrypt_4:
 	push	r13
 
-$L$SEH_prolog_aesni_gcm_encrypt_5:
+$L$SEH_prologue_aesni_gcm_encrypt_5:
 	push	r14
 
-$L$SEH_prolog_aesni_gcm_encrypt_6:
+$L$SEH_prologue_aesni_gcm_encrypt_6:
 	push	r15
 
-$L$SEH_prolog_aesni_gcm_encrypt_7:
+$L$SEH_prologue_aesni_gcm_encrypt_7:
 	lea	rsp,[((-168))+rsp]
-$L$SEH_prolog_aesni_gcm_encrypt_8:
-$L$SEH_prolog_aesni_gcm_encrypt_9:
+$L$SEH_prologue_aesni_gcm_encrypt_8:
+$L$SEH_prologue_aesni_gcm_encrypt_9:
 
 
 
 	mov	QWORD[16+rbp],rdi
-$L$SEH_prolog_aesni_gcm_encrypt_10:
+$L$SEH_prologue_aesni_gcm_encrypt_10:
 	mov	QWORD[24+rbp],rsi
-$L$SEH_prolog_aesni_gcm_encrypt_11:
+$L$SEH_prologue_aesni_gcm_encrypt_11:
 	mov	rdi,QWORD[48+rbp]
 	mov	rsi,QWORD[56+rbp]
 
 	movaps	XMMWORD[(-208)+rbp],xmm6
-$L$SEH_prolog_aesni_gcm_encrypt_12:
+$L$SEH_prologue_aesni_gcm_encrypt_12:
 	movaps	XMMWORD[(-192)+rbp],xmm7
-$L$SEH_prolog_aesni_gcm_encrypt_13:
+$L$SEH_prologue_aesni_gcm_encrypt_13:
 	movaps	XMMWORD[(-176)+rbp],xmm8
-$L$SEH_prolog_aesni_gcm_encrypt_14:
+$L$SEH_prologue_aesni_gcm_encrypt_14:
 	movaps	XMMWORD[(-160)+rbp],xmm9
-$L$SEH_prolog_aesni_gcm_encrypt_15:
+$L$SEH_prologue_aesni_gcm_encrypt_15:
 	movaps	XMMWORD[(-144)+rbp],xmm10
-$L$SEH_prolog_aesni_gcm_encrypt_16:
+$L$SEH_prologue_aesni_gcm_encrypt_16:
 	movaps	XMMWORD[(-128)+rbp],xmm11
-$L$SEH_prolog_aesni_gcm_encrypt_17:
+$L$SEH_prologue_aesni_gcm_encrypt_17:
 	movaps	XMMWORD[(-112)+rbp],xmm12
-$L$SEH_prolog_aesni_gcm_encrypt_18:
+$L$SEH_prologue_aesni_gcm_encrypt_18:
 	movaps	XMMWORD[(-96)+rbp],xmm13
-$L$SEH_prolog_aesni_gcm_encrypt_19:
+$L$SEH_prologue_aesni_gcm_encrypt_19:
 	movaps	XMMWORD[(-80)+rbp],xmm14
-$L$SEH_prolog_aesni_gcm_encrypt_20:
+$L$SEH_prologue_aesni_gcm_encrypt_20:
 	movaps	XMMWORD[(-64)+rbp],xmm15
-$L$SEH_prolog_aesni_gcm_encrypt_21:
+$L$SEH_prologue_aesni_gcm_encrypt_21:
+$L$SEH_endprologue_aesni_gcm_encrypt_22:
 	vzeroupper
 
 	vmovdqu	xmm1,XMMWORD[rdi]
@@ -943,7 +945,7 @@
 
 $L$gcm_enc_abort:
 	ret
-$L$SEH_end_aesni_gcm_encrypt_22:
+$L$SEH_end_aesni_gcm_encrypt_23:
 
 
 section	.rdata rdata align=8
@@ -968,11 +970,11 @@
 section	.pdata rdata align=4
 ALIGN	4
 	DD	$L$SEH_begin_aesni_gcm_decrypt_1 wrt ..imagebase
-	DD	$L$SEH_end_aesni_gcm_decrypt_22 wrt ..imagebase
+	DD	$L$SEH_end_aesni_gcm_decrypt_23 wrt ..imagebase
 	DD	$L$SEH_info_aesni_gcm_decrypt_0 wrt ..imagebase
 
 	DD	$L$SEH_begin_aesni_gcm_encrypt_1 wrt ..imagebase
-	DD	$L$SEH_end_aesni_gcm_encrypt_22 wrt ..imagebase
+	DD	$L$SEH_end_aesni_gcm_encrypt_23 wrt ..imagebase
 	DD	$L$SEH_info_aesni_gcm_encrypt_0 wrt ..imagebase
 
 
@@ -980,120 +982,120 @@
 ALIGN	4
 $L$SEH_info_aesni_gcm_decrypt_0:
 	DB	1
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_21-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_endprologue_aesni_gcm_decrypt_22-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	33
 	DB	213
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_21-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_21-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	248
 	DW	9
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_20-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_20-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	232
 	DW	8
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_19-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_19-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	216
 	DW	7
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_18-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_18-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	200
 	DW	6
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_17-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_17-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	184
 	DW	5
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_16-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_16-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	168
 	DW	4
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_15-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_15-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	152
 	DW	3
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_14-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_14-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	136
 	DW	2
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_13-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_13-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	120
 	DW	1
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_12-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_12-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	104
 	DW	0
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_11-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_11-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	100
 	DW	29
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_10-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_10-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	116
 	DW	28
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_9-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_9-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	3
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_8-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_8-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	1
 	DW	21
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_7-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_7-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	240
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_6-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_6-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	224
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_5-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_5-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	208
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_4-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_4-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	192
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_3-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_3-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	48
-	DB	$L$SEH_prolog_aesni_gcm_decrypt_2-$L$SEH_begin_aesni_gcm_decrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_decrypt_2-$L$SEH_begin_aesni_gcm_decrypt_1
 	DB	80
 
 $L$SEH_info_aesni_gcm_encrypt_0:
 	DB	1
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_21-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_endprologue_aesni_gcm_encrypt_22-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	33
 	DB	213
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_21-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_21-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	248
 	DW	9
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_20-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_20-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	232
 	DW	8
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_19-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_19-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	216
 	DW	7
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_18-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_18-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	200
 	DW	6
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_17-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_17-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	184
 	DW	5
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_16-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_16-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	168
 	DW	4
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_15-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_15-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	152
 	DW	3
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_14-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_14-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	136
 	DW	2
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_13-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_13-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	120
 	DW	1
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_12-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_12-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	104
 	DW	0
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_11-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_11-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	100
 	DW	29
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_10-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_10-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	116
 	DW	28
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_9-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_9-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	3
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_8-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_8-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	1
 	DW	21
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_7-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_7-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	240
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_6-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_6-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	224
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_5-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_5-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	208
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_4-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_4-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	192
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_3-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_3-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	48
-	DB	$L$SEH_prolog_aesni_gcm_encrypt_2-$L$SEH_begin_aesni_gcm_encrypt_1
+	DB	$L$SEH_prologue_aesni_gcm_encrypt_2-$L$SEH_begin_aesni_gcm_encrypt_1
 	DB	80
 %else
 ; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
diff --git a/gen/bcm/ghash-ssse3-x86_64-win.asm b/gen/bcm/ghash-ssse3-x86_64-win.asm
index 84c5d40..a8be60e 100644
--- a/gen/bcm/ghash-ssse3-x86_64-win.asm
+++ b/gen/bcm/ghash-ssse3-x86_64-win.asm
@@ -26,11 +26,12 @@
 $L$SEH_begin_gcm_gmult_ssse3_1:
 _CET_ENDBR
 	sub	rsp,40
-$L$SEH_prolog_gcm_gmult_ssse3_2:
+$L$SEH_prologue_gcm_gmult_ssse3_2:
 	movdqa	XMMWORD[rsp],xmm6
-$L$SEH_prolog_gcm_gmult_ssse3_3:
+$L$SEH_prologue_gcm_gmult_ssse3_3:
 	movdqa	XMMWORD[16+rsp],xmm10
-$L$SEH_prolog_gcm_gmult_ssse3_4:
+$L$SEH_prologue_gcm_gmult_ssse3_4:
+$L$SEH_endprologue_gcm_gmult_ssse3_5:
 	movdqu	xmm0,XMMWORD[rcx]
 	movdqa	xmm10,XMMWORD[$L$reverse_bytes]
 	movdqa	xmm2,XMMWORD[$L$low4_mask]
@@ -210,7 +211,7 @@
 	add	rsp,40
 	ret
 
-$L$SEH_end_gcm_gmult_ssse3_5:
+$L$SEH_end_gcm_gmult_ssse3_6:
 
 
 
@@ -225,13 +226,14 @@
 $L$SEH_begin_gcm_ghash_ssse3_1:
 _CET_ENDBR
 	sub	rsp,56
-$L$SEH_prolog_gcm_ghash_ssse3_2:
+$L$SEH_prologue_gcm_ghash_ssse3_2:
 	movdqa	XMMWORD[rsp],xmm6
-$L$SEH_prolog_gcm_ghash_ssse3_3:
+$L$SEH_prologue_gcm_ghash_ssse3_3:
 	movdqa	XMMWORD[16+rsp],xmm10
-$L$SEH_prolog_gcm_ghash_ssse3_4:
+$L$SEH_prologue_gcm_ghash_ssse3_4:
 	movdqa	XMMWORD[32+rsp],xmm11
-$L$SEH_prolog_gcm_ghash_ssse3_5:
+$L$SEH_prologue_gcm_ghash_ssse3_5:
+$L$SEH_endprologue_gcm_ghash_ssse3_6:
 	movdqu	xmm0,XMMWORD[rcx]
 	movdqa	xmm10,XMMWORD[$L$reverse_bytes]
 	movdqa	xmm11,XMMWORD[$L$low4_mask]
@@ -434,7 +436,7 @@
 	add	rsp,56
 	ret
 
-$L$SEH_end_gcm_ghash_ssse3_6:
+$L$SEH_end_gcm_ghash_ssse3_7:
 
 
 section	.rdata rdata align=8
@@ -451,11 +453,11 @@
 section	.pdata rdata align=4
 ALIGN	4
 	DD	$L$SEH_begin_gcm_gmult_ssse3_1 wrt ..imagebase
-	DD	$L$SEH_end_gcm_gmult_ssse3_5 wrt ..imagebase
+	DD	$L$SEH_end_gcm_gmult_ssse3_6 wrt ..imagebase
 	DD	$L$SEH_info_gcm_gmult_ssse3_0 wrt ..imagebase
 
 	DD	$L$SEH_begin_gcm_ghash_ssse3_1 wrt ..imagebase
-	DD	$L$SEH_end_gcm_ghash_ssse3_6 wrt ..imagebase
+	DD	$L$SEH_end_gcm_ghash_ssse3_7 wrt ..imagebase
 	DD	$L$SEH_info_gcm_ghash_ssse3_0 wrt ..imagebase
 
 
@@ -463,33 +465,33 @@
 ALIGN	4
 $L$SEH_info_gcm_gmult_ssse3_0:
 	DB	1
-	DB	$L$SEH_prolog_gcm_gmult_ssse3_4-$L$SEH_begin_gcm_gmult_ssse3_1
+	DB	$L$SEH_endprologue_gcm_gmult_ssse3_5-$L$SEH_begin_gcm_gmult_ssse3_1
 	DB	5
 	DB	0
-	DB	$L$SEH_prolog_gcm_gmult_ssse3_4-$L$SEH_begin_gcm_gmult_ssse3_1
+	DB	$L$SEH_prologue_gcm_gmult_ssse3_4-$L$SEH_begin_gcm_gmult_ssse3_1
 	DB	168
 	DW	1
-	DB	$L$SEH_prolog_gcm_gmult_ssse3_3-$L$SEH_begin_gcm_gmult_ssse3_1
+	DB	$L$SEH_prologue_gcm_gmult_ssse3_3-$L$SEH_begin_gcm_gmult_ssse3_1
 	DB	104
 	DW	0
-	DB	$L$SEH_prolog_gcm_gmult_ssse3_2-$L$SEH_begin_gcm_gmult_ssse3_1
+	DB	$L$SEH_prologue_gcm_gmult_ssse3_2-$L$SEH_begin_gcm_gmult_ssse3_1
 	DB	66
 
 $L$SEH_info_gcm_ghash_ssse3_0:
 	DB	1
-	DB	$L$SEH_prolog_gcm_ghash_ssse3_5-$L$SEH_begin_gcm_ghash_ssse3_1
+	DB	$L$SEH_endprologue_gcm_ghash_ssse3_6-$L$SEH_begin_gcm_ghash_ssse3_1
 	DB	7
 	DB	0
-	DB	$L$SEH_prolog_gcm_ghash_ssse3_5-$L$SEH_begin_gcm_ghash_ssse3_1
+	DB	$L$SEH_prologue_gcm_ghash_ssse3_5-$L$SEH_begin_gcm_ghash_ssse3_1
 	DB	184
 	DW	2
-	DB	$L$SEH_prolog_gcm_ghash_ssse3_4-$L$SEH_begin_gcm_ghash_ssse3_1
+	DB	$L$SEH_prologue_gcm_ghash_ssse3_4-$L$SEH_begin_gcm_ghash_ssse3_1
 	DB	168
 	DW	1
-	DB	$L$SEH_prolog_gcm_ghash_ssse3_3-$L$SEH_begin_gcm_ghash_ssse3_1
+	DB	$L$SEH_prologue_gcm_ghash_ssse3_3-$L$SEH_begin_gcm_ghash_ssse3_1
 	DB	104
 	DW	0
-	DB	$L$SEH_prolog_gcm_ghash_ssse3_2-$L$SEH_begin_gcm_ghash_ssse3_1
+	DB	$L$SEH_prologue_gcm_ghash_ssse3_2-$L$SEH_begin_gcm_ghash_ssse3_1
 	DB	98
 %else
 ; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
diff --git a/gen/bcm/ghash-x86_64-apple.S b/gen/bcm/ghash-x86_64-apple.S
index 909d659..4961298 100644
--- a/gen/bcm/ghash-x86_64-apple.S
+++ b/gen/bcm/ghash-x86_64-apple.S
@@ -612,6 +612,7 @@
 .p2align	5
 _gcm_init_avx:
 
+
 _CET_ENDBR
 	vzeroupper
 
@@ -734,6 +735,7 @@
 .p2align	5
 _gcm_ghash_avx:
 
+
 _CET_ENDBR
 	vzeroupper
 
diff --git a/gen/bcm/ghash-x86_64-linux.S b/gen/bcm/ghash-x86_64-linux.S
index 22429a6..e00bb9f 100644
--- a/gen/bcm/ghash-x86_64-linux.S
+++ b/gen/bcm/ghash-x86_64-linux.S
@@ -612,6 +612,7 @@
 .align	32
 gcm_init_avx:
 .cfi_startproc	
+
 _CET_ENDBR
 	vzeroupper
 
@@ -734,6 +735,7 @@
 .align	32
 gcm_ghash_avx:
 .cfi_startproc	
+
 _CET_ENDBR
 	vzeroupper
 
diff --git a/gen/bcm/ghash-x86_64-win.asm b/gen/bcm/ghash-x86_64-win.asm
index 41b189a..bd4d691 100644
--- a/gen/bcm/ghash-x86_64-win.asm
+++ b/gen/bcm/ghash-x86_64-win.asm
@@ -22,9 +22,10 @@
 _CET_ENDBR
 $L$_init_clmul:
 	sub	rsp,0x18
-$L$SEH_prolog_gcm_init_clmul_2:
+$L$SEH_prologue_gcm_init_clmul_2:
 	movaps	XMMWORD[rsp],xmm6
-$L$SEH_prolog_gcm_init_clmul_3:
+$L$SEH_prologue_gcm_init_clmul_3:
+$L$SEH_endprologue_gcm_init_clmul_4:
 	movdqu	xmm2,XMMWORD[rdx]
 	pshufd	xmm2,xmm2,78
 
@@ -178,7 +179,7 @@
 	lea	rsp,[24+rsp]
 	ret
 
-$L$SEH_end_gcm_init_clmul_4:
+$L$SEH_end_gcm_init_clmul_5:
 
 global	gcm_gmult_clmul
 
@@ -244,27 +245,28 @@
 $L$_ghash_clmul:
 	lea	rax,[((-136))+rsp]
 	lea	rsp,[((-32))+rax]
-$L$SEH_prolog_gcm_ghash_clmul_2:
+$L$SEH_prologue_gcm_ghash_clmul_2:
 	movaps	XMMWORD[(-32)+rax],xmm6
-$L$SEH_prolog_gcm_ghash_clmul_3:
+$L$SEH_prologue_gcm_ghash_clmul_3:
 	movaps	XMMWORD[(-16)+rax],xmm7
-$L$SEH_prolog_gcm_ghash_clmul_4:
+$L$SEH_prologue_gcm_ghash_clmul_4:
 	movaps	XMMWORD[rax],xmm8
-$L$SEH_prolog_gcm_ghash_clmul_5:
+$L$SEH_prologue_gcm_ghash_clmul_5:
 	movaps	XMMWORD[16+rax],xmm9
-$L$SEH_prolog_gcm_ghash_clmul_6:
+$L$SEH_prologue_gcm_ghash_clmul_6:
 	movaps	XMMWORD[32+rax],xmm10
-$L$SEH_prolog_gcm_ghash_clmul_7:
+$L$SEH_prologue_gcm_ghash_clmul_7:
 	movaps	XMMWORD[48+rax],xmm11
-$L$SEH_prolog_gcm_ghash_clmul_8:
+$L$SEH_prologue_gcm_ghash_clmul_8:
 	movaps	XMMWORD[64+rax],xmm12
-$L$SEH_prolog_gcm_ghash_clmul_9:
+$L$SEH_prologue_gcm_ghash_clmul_9:
 	movaps	XMMWORD[80+rax],xmm13
-$L$SEH_prolog_gcm_ghash_clmul_10:
+$L$SEH_prologue_gcm_ghash_clmul_10:
 	movaps	XMMWORD[96+rax],xmm14
-$L$SEH_prolog_gcm_ghash_clmul_11:
+$L$SEH_prologue_gcm_ghash_clmul_11:
 	movaps	XMMWORD[112+rax],xmm15
-$L$SEH_prolog_gcm_ghash_clmul_12:
+$L$SEH_prologue_gcm_ghash_clmul_12:
+$L$SEH_endprologue_gcm_ghash_clmul_13:
 	movdqa	xmm10,XMMWORD[$L$bswap_mask]
 
 	movdqu	xmm0,XMMWORD[rcx]
@@ -649,19 +651,20 @@
 	lea	rsp,[168+rsp]
 	ret
 
-$L$SEH_end_gcm_ghash_clmul_13:
+$L$SEH_end_gcm_ghash_clmul_14:
 
 global	gcm_init_avx
 
 ALIGN	32
 gcm_init_avx:
 
-_CET_ENDBR
 $L$SEH_begin_gcm_init_avx_1:
+_CET_ENDBR
 	sub	rsp,0x18
-$L$SEH_prolog_gcm_init_avx_2:
+$L$SEH_prologue_gcm_init_avx_2:
 	movaps	XMMWORD[rsp],xmm6
-$L$SEH_prolog_gcm_init_avx_3:
+$L$SEH_prologue_gcm_init_avx_3:
+$L$SEH_endprologue_gcm_init_avx_4:
 	vzeroupper
 
 	vmovdqu	xmm2,XMMWORD[rdx]
@@ -766,7 +769,7 @@
 	movaps	xmm6,XMMWORD[rsp]
 	lea	rsp,[24+rsp]
 	ret
-$L$SEH_end_gcm_init_avx_4:
+$L$SEH_end_gcm_init_avx_5:
 
 
 global	gcm_gmult_avx
@@ -783,31 +786,32 @@
 ALIGN	32
 gcm_ghash_avx:
 
-_CET_ENDBR
 $L$SEH_begin_gcm_ghash_avx_1:
+_CET_ENDBR
 	lea	rax,[((-136))+rsp]
 	lea	rsp,[((-32))+rax]
-$L$SEH_prolog_gcm_ghash_avx_2:
+$L$SEH_prologue_gcm_ghash_avx_2:
 	movaps	XMMWORD[(-32)+rax],xmm6
-$L$SEH_prolog_gcm_ghash_avx_3:
+$L$SEH_prologue_gcm_ghash_avx_3:
 	movaps	XMMWORD[(-16)+rax],xmm7
-$L$SEH_prolog_gcm_ghash_avx_4:
+$L$SEH_prologue_gcm_ghash_avx_4:
 	movaps	XMMWORD[rax],xmm8
-$L$SEH_prolog_gcm_ghash_avx_5:
+$L$SEH_prologue_gcm_ghash_avx_5:
 	movaps	XMMWORD[16+rax],xmm9
-$L$SEH_prolog_gcm_ghash_avx_6:
+$L$SEH_prologue_gcm_ghash_avx_6:
 	movaps	XMMWORD[32+rax],xmm10
-$L$SEH_prolog_gcm_ghash_avx_7:
+$L$SEH_prologue_gcm_ghash_avx_7:
 	movaps	XMMWORD[48+rax],xmm11
-$L$SEH_prolog_gcm_ghash_avx_8:
+$L$SEH_prologue_gcm_ghash_avx_8:
 	movaps	XMMWORD[64+rax],xmm12
-$L$SEH_prolog_gcm_ghash_avx_9:
+$L$SEH_prologue_gcm_ghash_avx_9:
 	movaps	XMMWORD[80+rax],xmm13
-$L$SEH_prolog_gcm_ghash_avx_10:
+$L$SEH_prologue_gcm_ghash_avx_10:
 	movaps	XMMWORD[96+rax],xmm14
-$L$SEH_prolog_gcm_ghash_avx_11:
+$L$SEH_prologue_gcm_ghash_avx_11:
 	movaps	XMMWORD[112+rax],xmm15
-$L$SEH_prolog_gcm_ghash_avx_12:
+$L$SEH_prologue_gcm_ghash_avx_12:
+$L$SEH_endprologue_gcm_ghash_avx_13:
 	vzeroupper
 
 	vmovdqu	xmm10,XMMWORD[rcx]
@@ -1191,7 +1195,7 @@
 	lea	rsp,[168+rsp]
 	ret
 
-$L$SEH_end_gcm_ghash_avx_13:
+$L$SEH_end_gcm_ghash_avx_14:
 
 section	.rdata rdata align=8
 ALIGN	64
@@ -1213,19 +1217,19 @@
 section	.pdata rdata align=4
 ALIGN	4
 	DD	$L$SEH_begin_gcm_init_clmul_1 wrt ..imagebase
-	DD	$L$SEH_end_gcm_init_clmul_4 wrt ..imagebase
+	DD	$L$SEH_end_gcm_init_clmul_5 wrt ..imagebase
 	DD	$L$SEH_info_gcm_init_clmul_0 wrt ..imagebase
 
 	DD	$L$SEH_begin_gcm_ghash_clmul_1 wrt ..imagebase
-	DD	$L$SEH_end_gcm_ghash_clmul_13 wrt ..imagebase
+	DD	$L$SEH_end_gcm_ghash_clmul_14 wrt ..imagebase
 	DD	$L$SEH_info_gcm_ghash_clmul_0 wrt ..imagebase
 
 	DD	$L$SEH_begin_gcm_init_avx_1 wrt ..imagebase
-	DD	$L$SEH_end_gcm_init_avx_4 wrt ..imagebase
+	DD	$L$SEH_end_gcm_init_avx_5 wrt ..imagebase
 	DD	$L$SEH_info_gcm_init_avx_0 wrt ..imagebase
 
 	DD	$L$SEH_begin_gcm_ghash_avx_1 wrt ..imagebase
-	DD	$L$SEH_end_gcm_ghash_avx_13 wrt ..imagebase
+	DD	$L$SEH_end_gcm_ghash_avx_14 wrt ..imagebase
 	DD	$L$SEH_info_gcm_ghash_avx_0 wrt ..imagebase
 
 
@@ -1233,101 +1237,101 @@
 ALIGN	4
 $L$SEH_info_gcm_init_clmul_0:
 	DB	1
-	DB	$L$SEH_prolog_gcm_init_clmul_3-$L$SEH_begin_gcm_init_clmul_1
+	DB	$L$SEH_endprologue_gcm_init_clmul_4-$L$SEH_begin_gcm_init_clmul_1
 	DB	3
 	DB	0
-	DB	$L$SEH_prolog_gcm_init_clmul_3-$L$SEH_begin_gcm_init_clmul_1
+	DB	$L$SEH_prologue_gcm_init_clmul_3-$L$SEH_begin_gcm_init_clmul_1
 	DB	104
 	DW	0
-	DB	$L$SEH_prolog_gcm_init_clmul_2-$L$SEH_begin_gcm_init_clmul_1
+	DB	$L$SEH_prologue_gcm_init_clmul_2-$L$SEH_begin_gcm_init_clmul_1
 	DB	34
 
 $L$SEH_info_gcm_ghash_clmul_0:
 	DB	1
-	DB	$L$SEH_prolog_gcm_ghash_clmul_12-$L$SEH_begin_gcm_ghash_clmul_1
+	DB	$L$SEH_endprologue_gcm_ghash_clmul_13-$L$SEH_begin_gcm_ghash_clmul_1
 	DB	22
 	DB	0
-	DB	$L$SEH_prolog_gcm_ghash_clmul_12-$L$SEH_begin_gcm_ghash_clmul_1
+	DB	$L$SEH_prologue_gcm_ghash_clmul_12-$L$SEH_begin_gcm_ghash_clmul_1
 	DB	248
 	DW	9
-	DB	$L$SEH_prolog_gcm_ghash_clmul_11-$L$SEH_begin_gcm_ghash_clmul_1
+	DB	$L$SEH_prologue_gcm_ghash_clmul_11-$L$SEH_begin_gcm_ghash_clmul_1
 	DB	232
 	DW	8
-	DB	$L$SEH_prolog_gcm_ghash_clmul_10-$L$SEH_begin_gcm_ghash_clmul_1
+	DB	$L$SEH_prologue_gcm_ghash_clmul_10-$L$SEH_begin_gcm_ghash_clmul_1
 	DB	216
 	DW	7
-	DB	$L$SEH_prolog_gcm_ghash_clmul_9-$L$SEH_begin_gcm_ghash_clmul_1
+	DB	$L$SEH_prologue_gcm_ghash_clmul_9-$L$SEH_begin_gcm_ghash_clmul_1
 	DB	200
 	DW	6
-	DB	$L$SEH_prolog_gcm_ghash_clmul_8-$L$SEH_begin_gcm_ghash_clmul_1
+	DB	$L$SEH_prologue_gcm_ghash_clmul_8-$L$SEH_begin_gcm_ghash_clmul_1
 	DB	184
 	DW	5
-	DB	$L$SEH_prolog_gcm_ghash_clmul_7-$L$SEH_begin_gcm_ghash_clmul_1
+	DB	$L$SEH_prologue_gcm_ghash_clmul_7-$L$SEH_begin_gcm_ghash_clmul_1
 	DB	168
 	DW	4
-	DB	$L$SEH_prolog_gcm_ghash_clmul_6-$L$SEH_begin_gcm_ghash_clmul_1
+	DB	$L$SEH_prologue_gcm_ghash_clmul_6-$L$SEH_begin_gcm_ghash_clmul_1
 	DB	152
 	DW	3
-	DB	$L$SEH_prolog_gcm_ghash_clmul_5-$L$SEH_begin_gcm_ghash_clmul_1
+	DB	$L$SEH_prologue_gcm_ghash_clmul_5-$L$SEH_begin_gcm_ghash_clmul_1
 	DB	136
 	DW	2
-	DB	$L$SEH_prolog_gcm_ghash_clmul_4-$L$SEH_begin_gcm_ghash_clmul_1
+	DB	$L$SEH_prologue_gcm_ghash_clmul_4-$L$SEH_begin_gcm_ghash_clmul_1
 	DB	120
 	DW	1
-	DB	$L$SEH_prolog_gcm_ghash_clmul_3-$L$SEH_begin_gcm_ghash_clmul_1
+	DB	$L$SEH_prologue_gcm_ghash_clmul_3-$L$SEH_begin_gcm_ghash_clmul_1
 	DB	104
 	DW	0
-	DB	$L$SEH_prolog_gcm_ghash_clmul_2-$L$SEH_begin_gcm_ghash_clmul_1
+	DB	$L$SEH_prologue_gcm_ghash_clmul_2-$L$SEH_begin_gcm_ghash_clmul_1
 	DB	1
 	DW	21
 
 $L$SEH_info_gcm_init_avx_0:
 	DB	1
-	DB	$L$SEH_prolog_gcm_init_avx_3-$L$SEH_begin_gcm_init_avx_1
+	DB	$L$SEH_endprologue_gcm_init_avx_4-$L$SEH_begin_gcm_init_avx_1
 	DB	3
 	DB	0
-	DB	$L$SEH_prolog_gcm_init_avx_3-$L$SEH_begin_gcm_init_avx_1
+	DB	$L$SEH_prologue_gcm_init_avx_3-$L$SEH_begin_gcm_init_avx_1
 	DB	104
 	DW	0
-	DB	$L$SEH_prolog_gcm_init_avx_2-$L$SEH_begin_gcm_init_avx_1
+	DB	$L$SEH_prologue_gcm_init_avx_2-$L$SEH_begin_gcm_init_avx_1
 	DB	34
 
 $L$SEH_info_gcm_ghash_avx_0:
 	DB	1
-	DB	$L$SEH_prolog_gcm_ghash_avx_12-$L$SEH_begin_gcm_ghash_avx_1
+	DB	$L$SEH_endprologue_gcm_ghash_avx_13-$L$SEH_begin_gcm_ghash_avx_1
 	DB	22
 	DB	0
-	DB	$L$SEH_prolog_gcm_ghash_avx_12-$L$SEH_begin_gcm_ghash_avx_1
+	DB	$L$SEH_prologue_gcm_ghash_avx_12-$L$SEH_begin_gcm_ghash_avx_1
 	DB	248
 	DW	9
-	DB	$L$SEH_prolog_gcm_ghash_avx_11-$L$SEH_begin_gcm_ghash_avx_1
+	DB	$L$SEH_prologue_gcm_ghash_avx_11-$L$SEH_begin_gcm_ghash_avx_1
 	DB	232
 	DW	8
-	DB	$L$SEH_prolog_gcm_ghash_avx_10-$L$SEH_begin_gcm_ghash_avx_1
+	DB	$L$SEH_prologue_gcm_ghash_avx_10-$L$SEH_begin_gcm_ghash_avx_1
 	DB	216
 	DW	7
-	DB	$L$SEH_prolog_gcm_ghash_avx_9-$L$SEH_begin_gcm_ghash_avx_1
+	DB	$L$SEH_prologue_gcm_ghash_avx_9-$L$SEH_begin_gcm_ghash_avx_1
 	DB	200
 	DW	6
-	DB	$L$SEH_prolog_gcm_ghash_avx_8-$L$SEH_begin_gcm_ghash_avx_1
+	DB	$L$SEH_prologue_gcm_ghash_avx_8-$L$SEH_begin_gcm_ghash_avx_1
 	DB	184
 	DW	5
-	DB	$L$SEH_prolog_gcm_ghash_avx_7-$L$SEH_begin_gcm_ghash_avx_1
+	DB	$L$SEH_prologue_gcm_ghash_avx_7-$L$SEH_begin_gcm_ghash_avx_1
 	DB	168
 	DW	4
-	DB	$L$SEH_prolog_gcm_ghash_avx_6-$L$SEH_begin_gcm_ghash_avx_1
+	DB	$L$SEH_prologue_gcm_ghash_avx_6-$L$SEH_begin_gcm_ghash_avx_1
 	DB	152
 	DW	3
-	DB	$L$SEH_prolog_gcm_ghash_avx_5-$L$SEH_begin_gcm_ghash_avx_1
+	DB	$L$SEH_prologue_gcm_ghash_avx_5-$L$SEH_begin_gcm_ghash_avx_1
 	DB	136
 	DW	2
-	DB	$L$SEH_prolog_gcm_ghash_avx_4-$L$SEH_begin_gcm_ghash_avx_1
+	DB	$L$SEH_prologue_gcm_ghash_avx_4-$L$SEH_begin_gcm_ghash_avx_1
 	DB	120
 	DW	1
-	DB	$L$SEH_prolog_gcm_ghash_avx_3-$L$SEH_begin_gcm_ghash_avx_1
+	DB	$L$SEH_prologue_gcm_ghash_avx_3-$L$SEH_begin_gcm_ghash_avx_1
 	DB	104
 	DW	0
-	DB	$L$SEH_prolog_gcm_ghash_avx_2-$L$SEH_begin_gcm_ghash_avx_1
+	DB	$L$SEH_prologue_gcm_ghash_avx_2-$L$SEH_begin_gcm_ghash_avx_1
 	DB	1
 	DW	21
 %else
diff --git a/gen/test_support/trampoline-x86_64-apple.S b/gen/test_support/trampoline-x86_64-apple.S
index 7c76d2d..f618dac 100644
--- a/gen/test_support/trampoline-x86_64-apple.S
+++ b/gen/test_support/trampoline-x86_64-apple.S
@@ -52,6 +52,7 @@
 	movq	%r15,104(%rsp)
 
 
+
 	movq	0(%rsi),%rbx
 	movq	8(%rsi),%rbp
 	movq	16(%rsi),%r12
@@ -473,6 +474,7 @@
 
 
 
+
 	nop
 	popq	%r12
 
@@ -496,6 +498,7 @@
 
 
 
+
 	movq	%r12,%rax
 	incq	%rax
 	movq	%rax,(%rsp)
diff --git a/gen/test_support/trampoline-x86_64-linux.S b/gen/test_support/trampoline-x86_64-linux.S
index 93af8b9..8557e9e 100644
--- a/gen/test_support/trampoline-x86_64-linux.S
+++ b/gen/test_support/trampoline-x86_64-linux.S
@@ -52,6 +52,7 @@
 	movq	%r15,104(%rsp)
 .cfi_offset	r15, -24
 
+
 	movq	0(%rsi),%rbx
 	movq	8(%rsi),%rbp
 	movq	16(%rsi),%r12
@@ -474,6 +475,7 @@
 
 
 
+
 	nop
 	popq	%r12
 .cfi_adjust_cfa_offset	-8
@@ -499,6 +501,7 @@
 .cfi_offset	%r12,-16
 
 
+
 	movq	%r12,%rax
 	incq	%rax
 	movq	%rax,(%rsp)
diff --git a/gen/test_support/trampoline-x86_64-win.asm b/gen/test_support/trampoline-x86_64-win.asm
index ae04cbe..dca3957 100644
--- a/gen/test_support/trampoline-x86_64-win.asm
+++ b/gen/test_support/trampoline-x86_64-win.asm
@@ -39,61 +39,62 @@
 
 	sub	rsp,344
 
-$L$SEH_prolog_abi_test_trampoline_2:
+$L$SEH_prologue_abi_test_trampoline_2:
 	mov	QWORD[112+rsp],rbx
 
-$L$SEH_prolog_abi_test_trampoline_3:
+$L$SEH_prologue_abi_test_trampoline_3:
 	mov	QWORD[120+rsp],rbp
 
-$L$SEH_prolog_abi_test_trampoline_4:
+$L$SEH_prologue_abi_test_trampoline_4:
 	mov	QWORD[128+rsp],rdi
 
-$L$SEH_prolog_abi_test_trampoline_5:
+$L$SEH_prologue_abi_test_trampoline_5:
 	mov	QWORD[136+rsp],rsi
 
-$L$SEH_prolog_abi_test_trampoline_6:
+$L$SEH_prologue_abi_test_trampoline_6:
 	mov	QWORD[144+rsp],r12
 
-$L$SEH_prolog_abi_test_trampoline_7:
+$L$SEH_prologue_abi_test_trampoline_7:
 	mov	QWORD[152+rsp],r13
 
-$L$SEH_prolog_abi_test_trampoline_8:
+$L$SEH_prologue_abi_test_trampoline_8:
 	mov	QWORD[160+rsp],r14
 
-$L$SEH_prolog_abi_test_trampoline_9:
+$L$SEH_prologue_abi_test_trampoline_9:
 	mov	QWORD[168+rsp],r15
 
-$L$SEH_prolog_abi_test_trampoline_10:
+$L$SEH_prologue_abi_test_trampoline_10:
 	movdqa	XMMWORD[176+rsp],xmm6
 
-$L$SEH_prolog_abi_test_trampoline_11:
+$L$SEH_prologue_abi_test_trampoline_11:
 	movdqa	XMMWORD[192+rsp],xmm7
 
-$L$SEH_prolog_abi_test_trampoline_12:
+$L$SEH_prologue_abi_test_trampoline_12:
 	movdqa	XMMWORD[208+rsp],xmm8
 
-$L$SEH_prolog_abi_test_trampoline_13:
+$L$SEH_prologue_abi_test_trampoline_13:
 	movdqa	XMMWORD[224+rsp],xmm9
 
-$L$SEH_prolog_abi_test_trampoline_14:
+$L$SEH_prologue_abi_test_trampoline_14:
 	movdqa	XMMWORD[240+rsp],xmm10
 
-$L$SEH_prolog_abi_test_trampoline_15:
+$L$SEH_prologue_abi_test_trampoline_15:
 	movdqa	XMMWORD[256+rsp],xmm11
 
-$L$SEH_prolog_abi_test_trampoline_16:
+$L$SEH_prologue_abi_test_trampoline_16:
 	movdqa	XMMWORD[272+rsp],xmm12
 
-$L$SEH_prolog_abi_test_trampoline_17:
+$L$SEH_prologue_abi_test_trampoline_17:
 	movdqa	XMMWORD[288+rsp],xmm13
 
-$L$SEH_prolog_abi_test_trampoline_18:
+$L$SEH_prologue_abi_test_trampoline_18:
 	movdqa	XMMWORD[304+rsp],xmm14
 
-$L$SEH_prolog_abi_test_trampoline_19:
+$L$SEH_prologue_abi_test_trampoline_19:
 	movdqa	XMMWORD[320+rsp],xmm15
 
-$L$SEH_prolog_abi_test_trampoline_20:
+$L$SEH_prologue_abi_test_trampoline_20:
+$L$SEH_endprologue_abi_test_trampoline_21:
 	mov	rbx,QWORD[rdx]
 	mov	rbp,QWORD[8+rdx]
 	mov	rdi,QWORD[16+rdx]
@@ -254,7 +255,7 @@
 
 	ret
 
-$L$SEH_end_abi_test_trampoline_21:
+$L$SEH_end_abi_test_trampoline_22:
 
 
 global	abi_test_clobber_rax
@@ -516,7 +517,8 @@
 _CET_ENDBR
 	push	r12
 
-$L$SEH_prolog_abi_test_bad_unwind_wrong_register_2:
+$L$SEH_prologue_abi_test_bad_unwind_wrong_register_2:
+$L$SEH_endprologue_abi_test_bad_unwind_wrong_register_3:
 
 
 
@@ -524,7 +526,7 @@
 	pop	r12
 
 	ret
-$L$SEH_end_abi_test_bad_unwind_wrong_register_3:
+$L$SEH_end_abi_test_bad_unwind_wrong_register_4:
 
 
 
@@ -540,7 +542,8 @@
 _CET_ENDBR
 	push	r12
 
-$L$SEH_prolog_abi_test_bad_unwind_temporary_2:
+$L$SEH_prologue_abi_test_bad_unwind_temporary_2:
+$L$SEH_endprologue_abi_test_bad_unwind_temporary_3:
 
 	mov	rax,r12
 	inc	rax
@@ -555,7 +558,7 @@
 
 	ret
 
-$L$SEH_end_abi_test_bad_unwind_temporary_3:
+$L$SEH_end_abi_test_bad_unwind_temporary_4:
 
 
 
@@ -592,7 +595,8 @@
 abi_test_bad_unwind_epilog:
 $L$SEH_begin_abi_test_bad_unwind_epilog_1:
 	push	r12
-$L$SEH_prolog_abi_test_bad_unwind_epilog_2:
+$L$SEH_prologue_abi_test_bad_unwind_epilog_2:
+$L$SEH_endprologue_abi_test_bad_unwind_epilog_3:
 
 	nop
 
@@ -600,24 +604,24 @@
 	pop	r12
 	nop
 	ret
-$L$SEH_end_abi_test_bad_unwind_epilog_3:
+$L$SEH_end_abi_test_bad_unwind_epilog_4:
 
 section	.pdata rdata align=4
 ALIGN	4
 	DD	$L$SEH_begin_abi_test_trampoline_1 wrt ..imagebase
-	DD	$L$SEH_end_abi_test_trampoline_21 wrt ..imagebase
+	DD	$L$SEH_end_abi_test_trampoline_22 wrt ..imagebase
 	DD	$L$SEH_info_abi_test_trampoline_0 wrt ..imagebase
 
 	DD	$L$SEH_begin_abi_test_bad_unwind_wrong_register_1 wrt ..imagebase
-	DD	$L$SEH_end_abi_test_bad_unwind_wrong_register_3 wrt ..imagebase
+	DD	$L$SEH_end_abi_test_bad_unwind_wrong_register_4 wrt ..imagebase
 	DD	$L$SEH_info_abi_test_bad_unwind_wrong_register_0 wrt ..imagebase
 
 	DD	$L$SEH_begin_abi_test_bad_unwind_temporary_1 wrt ..imagebase
-	DD	$L$SEH_end_abi_test_bad_unwind_temporary_3 wrt ..imagebase
+	DD	$L$SEH_end_abi_test_bad_unwind_temporary_4 wrt ..imagebase
 	DD	$L$SEH_info_abi_test_bad_unwind_temporary_0 wrt ..imagebase
 
 	DD	$L$SEH_begin_abi_test_bad_unwind_epilog_1 wrt ..imagebase
-	DD	$L$SEH_end_abi_test_bad_unwind_epilog_3 wrt ..imagebase
+	DD	$L$SEH_end_abi_test_bad_unwind_epilog_4 wrt ..imagebase
 	DD	$L$SEH_info_abi_test_bad_unwind_epilog_0 wrt ..imagebase
 
 
@@ -625,89 +629,89 @@
 ALIGN	4
 $L$SEH_info_abi_test_trampoline_0:
 	DB	1
-	DB	$L$SEH_prolog_abi_test_trampoline_20-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_endprologue_abi_test_trampoline_21-$L$SEH_begin_abi_test_trampoline_1
 	DB	38
 	DB	0
-	DB	$L$SEH_prolog_abi_test_trampoline_20-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_20-$L$SEH_begin_abi_test_trampoline_1
 	DB	248
 	DW	20
-	DB	$L$SEH_prolog_abi_test_trampoline_19-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_19-$L$SEH_begin_abi_test_trampoline_1
 	DB	232
 	DW	19
-	DB	$L$SEH_prolog_abi_test_trampoline_18-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_18-$L$SEH_begin_abi_test_trampoline_1
 	DB	216
 	DW	18
-	DB	$L$SEH_prolog_abi_test_trampoline_17-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_17-$L$SEH_begin_abi_test_trampoline_1
 	DB	200
 	DW	17
-	DB	$L$SEH_prolog_abi_test_trampoline_16-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_16-$L$SEH_begin_abi_test_trampoline_1
 	DB	184
 	DW	16
-	DB	$L$SEH_prolog_abi_test_trampoline_15-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_15-$L$SEH_begin_abi_test_trampoline_1
 	DB	168
 	DW	15
-	DB	$L$SEH_prolog_abi_test_trampoline_14-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_14-$L$SEH_begin_abi_test_trampoline_1
 	DB	152
 	DW	14
-	DB	$L$SEH_prolog_abi_test_trampoline_13-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_13-$L$SEH_begin_abi_test_trampoline_1
 	DB	136
 	DW	13
-	DB	$L$SEH_prolog_abi_test_trampoline_12-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_12-$L$SEH_begin_abi_test_trampoline_1
 	DB	120
 	DW	12
-	DB	$L$SEH_prolog_abi_test_trampoline_11-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_11-$L$SEH_begin_abi_test_trampoline_1
 	DB	104
 	DW	11
-	DB	$L$SEH_prolog_abi_test_trampoline_10-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_10-$L$SEH_begin_abi_test_trampoline_1
 	DB	244
 	DW	21
-	DB	$L$SEH_prolog_abi_test_trampoline_9-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_9-$L$SEH_begin_abi_test_trampoline_1
 	DB	228
 	DW	20
-	DB	$L$SEH_prolog_abi_test_trampoline_8-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_8-$L$SEH_begin_abi_test_trampoline_1
 	DB	212
 	DW	19
-	DB	$L$SEH_prolog_abi_test_trampoline_7-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_7-$L$SEH_begin_abi_test_trampoline_1
 	DB	196
 	DW	18
-	DB	$L$SEH_prolog_abi_test_trampoline_6-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_6-$L$SEH_begin_abi_test_trampoline_1
 	DB	100
 	DW	17
-	DB	$L$SEH_prolog_abi_test_trampoline_5-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_5-$L$SEH_begin_abi_test_trampoline_1
 	DB	116
 	DW	16
-	DB	$L$SEH_prolog_abi_test_trampoline_4-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_4-$L$SEH_begin_abi_test_trampoline_1
 	DB	84
 	DW	15
-	DB	$L$SEH_prolog_abi_test_trampoline_3-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_3-$L$SEH_begin_abi_test_trampoline_1
 	DB	52
 	DW	14
-	DB	$L$SEH_prolog_abi_test_trampoline_2-$L$SEH_begin_abi_test_trampoline_1
+	DB	$L$SEH_prologue_abi_test_trampoline_2-$L$SEH_begin_abi_test_trampoline_1
 	DB	1
 	DW	43
 
 $L$SEH_info_abi_test_bad_unwind_wrong_register_0:
 	DB	1
-	DB	$L$SEH_prolog_abi_test_bad_unwind_wrong_register_2-$L$SEH_begin_abi_test_bad_unwind_wrong_register_1
+	DB	$L$SEH_endprologue_abi_test_bad_unwind_wrong_register_3-$L$SEH_begin_abi_test_bad_unwind_wrong_register_1
 	DB	1
 	DB	0
-	DB	$L$SEH_prolog_abi_test_bad_unwind_wrong_register_2-$L$SEH_begin_abi_test_bad_unwind_wrong_register_1
+	DB	$L$SEH_prologue_abi_test_bad_unwind_wrong_register_2-$L$SEH_begin_abi_test_bad_unwind_wrong_register_1
 	DB	208
 
 $L$SEH_info_abi_test_bad_unwind_temporary_0:
 	DB	1
-	DB	$L$SEH_prolog_abi_test_bad_unwind_temporary_2-$L$SEH_begin_abi_test_bad_unwind_temporary_1
+	DB	$L$SEH_endprologue_abi_test_bad_unwind_temporary_3-$L$SEH_begin_abi_test_bad_unwind_temporary_1
 	DB	1
 	DB	0
-	DB	$L$SEH_prolog_abi_test_bad_unwind_temporary_2-$L$SEH_begin_abi_test_bad_unwind_temporary_1
+	DB	$L$SEH_prologue_abi_test_bad_unwind_temporary_2-$L$SEH_begin_abi_test_bad_unwind_temporary_1
 	DB	192
 
 $L$SEH_info_abi_test_bad_unwind_epilog_0:
 	DB	1
-	DB	$L$SEH_prolog_abi_test_bad_unwind_epilog_2-$L$SEH_begin_abi_test_bad_unwind_epilog_1
+	DB	$L$SEH_endprologue_abi_test_bad_unwind_epilog_3-$L$SEH_begin_abi_test_bad_unwind_epilog_1
 	DB	1
 	DB	0
-	DB	$L$SEH_prolog_abi_test_bad_unwind_epilog_2-$L$SEH_begin_abi_test_bad_unwind_epilog_1
+	DB	$L$SEH_prologue_abi_test_bad_unwind_epilog_2-$L$SEH_begin_abi_test_bad_unwind_epilog_1
 	DB	192
 %else
 ; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738