p256-x86_64-asm.pl: Win64 SEH face-lift.
This imports 384e6de4c7e35e37fb3d6fbeb32ddcb5eb0d3d3f and
79ca382d4762c58c4b92fceb4e202e90c71292ae from upstream.
Differences from upstream:
- We've removed a number of unused functions.
- We never imported 3ff08e1dde56747011a702a9a5aae06cfa8ae5fc, which was
to give the assembly control over the memory layout in the tables. So
our "gather" is "select" (which is implemented the same because the
memory layout never did change) and our "scatter" is in C.
Change-Id: I90d4a17da9f5f693f4dc4706887dec15f010071b
Reviewed-on: https://boringssl-review.googlesource.com/25586
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl b/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
index dd34d9d..0431379 100755
--- a/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
+++ b/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
@@ -102,6 +102,7 @@
ecp_nistz256_neg:
push %r12
push %r13
+.Lneg_body:
xor $a0, $a0
xor $a1, $a1
@@ -135,8 +136,10 @@
mov $a2, 8*2($r_ptr)
mov $a3, 8*3($r_ptr)
- pop %r13
- pop %r12
+ mov 0(%rsp),%r13
+ mov 8(%rsp),%r12
+ lea 16(%rsp),%rsp
+.Lneg_epilogue:
ret
.size ecp_nistz256_neg,.-ecp_nistz256_neg
___
@@ -172,6 +175,7 @@
push %r13
push %r14
push %r15
+.Lmul_body:
___
$code.=<<___ if ($addx);
cmp \$0x80100, %ecx
@@ -204,12 +208,14 @@
___
$code.=<<___;
.Lmul_mont_done:
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbx
- pop %rbp
+ mov 0(%rsp),%r15
+ mov 8(%rsp),%r14
+ mov 16(%rsp),%r13
+ mov 24(%rsp),%r12
+ mov 32(%rsp),%rbx
+ mov 40(%rsp),%rbp
+ lea 48(%rsp),%rsp
+.Lmul_epilogue:
ret
.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
@@ -453,6 +459,7 @@
push %r13
push %r14
push %r15
+.Lsqr_body:
___
$code.=<<___ if ($addx);
cmp \$0x80100, %ecx
@@ -481,12 +488,14 @@
___
$code.=<<___;
.Lsqr_mont_done:
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbx
- pop %rbp
+ mov 0(%rsp),%r15
+ mov 8(%rsp),%r14
+ mov 16(%rsp),%r13
+ mov 24(%rsp),%r12
+ mov 32(%rsp),%rbx
+ mov 40(%rsp),%rbp
+ lea 48(%rsp),%rsp
+.Lsqr_epilogue:
ret
.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
@@ -1051,10 +1060,10 @@
movaps 0x80(%rsp), %xmm14
movaps 0x90(%rsp), %xmm15
lea 0xa8(%rsp), %rsp
-.LSEH_end_ecp_nistz256_select_w5:
___
$code.=<<___;
ret
+.LSEH_end_ecp_nistz256_select_w5:
.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
################################################################################
@@ -1138,10 +1147,10 @@
movaps 0x80(%rsp), %xmm14
movaps 0x90(%rsp), %xmm15
lea 0xa8(%rsp), %rsp
-.LSEH_end_ecp_nistz256_select_w7:
___
$code.=<<___;
ret
+.LSEH_end_ecp_nistz256_select_w7:
.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
___
}
@@ -1162,18 +1171,19 @@
___
$code.=<<___ if ($win64);
lea -0x88(%rsp), %rax
+ mov %rsp,%r11
.LSEH_begin_ecp_nistz256_avx2_select_w5:
- .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp
- .byte 0xc5,0xf8,0x29,0x70,0xe0 #vmovaps %xmm6, -0x20(%rax)
- .byte 0xc5,0xf8,0x29,0x78,0xf0 #vmovaps %xmm7, -0x10(%rax)
- .byte 0xc5,0x78,0x29,0x40,0x00 #vmovaps %xmm8, 8(%rax)
- .byte 0xc5,0x78,0x29,0x48,0x10 #vmovaps %xmm9, 0x10(%rax)
- .byte 0xc5,0x78,0x29,0x50,0x20 #vmovaps %xmm10, 0x20(%rax)
- .byte 0xc5,0x78,0x29,0x58,0x30 #vmovaps %xmm11, 0x30(%rax)
- .byte 0xc5,0x78,0x29,0x60,0x40 #vmovaps %xmm12, 0x40(%rax)
- .byte 0xc5,0x78,0x29,0x68,0x50 #vmovaps %xmm13, 0x50(%rax)
- .byte 0xc5,0x78,0x29,0x70,0x60 #vmovaps %xmm14, 0x60(%rax)
- .byte 0xc5,0x78,0x29,0x78,0x70 #vmovaps %xmm15, 0x70(%rax)
+ .byte 0x48,0x8d,0x60,0xe0 # lea -0x20(%rax), %rsp
+ .byte 0xc5,0xf8,0x29,0x70,0xe0 # vmovaps %xmm6, -0x20(%rax)
+ .byte 0xc5,0xf8,0x29,0x78,0xf0 # vmovaps %xmm7, -0x10(%rax)
+ .byte 0xc5,0x78,0x29,0x40,0x00 # vmovaps %xmm8, 8(%rax)
+ .byte 0xc5,0x78,0x29,0x48,0x10 # vmovaps %xmm9, 0x10(%rax)
+ .byte 0xc5,0x78,0x29,0x50,0x20 # vmovaps %xmm10, 0x20(%rax)
+ .byte 0xc5,0x78,0x29,0x58,0x30 # vmovaps %xmm11, 0x30(%rax)
+ .byte 0xc5,0x78,0x29,0x60,0x40 # vmovaps %xmm12, 0x40(%rax)
+ .byte 0xc5,0x78,0x29,0x68,0x50 # vmovaps %xmm13, 0x50(%rax)
+ .byte 0xc5,0x78,0x29,0x70,0x60 # vmovaps %xmm14, 0x60(%rax)
+ .byte 0xc5,0x78,0x29,0x78,0x70 # vmovaps %xmm15, 0x70(%rax)
___
$code.=<<___;
vmovdqa .LTwo(%rip), $TWO
@@ -1239,11 +1249,11 @@
movaps 0x70(%rsp), %xmm13
movaps 0x80(%rsp), %xmm14
movaps 0x90(%rsp), %xmm15
- lea 0xa8(%rsp), %rsp
-.LSEH_end_ecp_nistz256_avx2_select_w5:
+ lea (%r11), %rsp
___
$code.=<<___;
ret
+.LSEH_end_ecp_nistz256_avx2_select_w5:
.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
___
}
@@ -1266,19 +1276,20 @@
vzeroupper
___
$code.=<<___ if ($win64);
+ mov %rsp,%r11
lea -0x88(%rsp), %rax
.LSEH_begin_ecp_nistz256_avx2_select_w7:
- .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp
- .byte 0xc5,0xf8,0x29,0x70,0xe0 #vmovaps %xmm6, -0x20(%rax)
- .byte 0xc5,0xf8,0x29,0x78,0xf0 #vmovaps %xmm7, -0x10(%rax)
- .byte 0xc5,0x78,0x29,0x40,0x00 #vmovaps %xmm8, 8(%rax)
- .byte 0xc5,0x78,0x29,0x48,0x10 #vmovaps %xmm9, 0x10(%rax)
- .byte 0xc5,0x78,0x29,0x50,0x20 #vmovaps %xmm10, 0x20(%rax)
- .byte 0xc5,0x78,0x29,0x58,0x30 #vmovaps %xmm11, 0x30(%rax)
- .byte 0xc5,0x78,0x29,0x60,0x40 #vmovaps %xmm12, 0x40(%rax)
- .byte 0xc5,0x78,0x29,0x68,0x50 #vmovaps %xmm13, 0x50(%rax)
- .byte 0xc5,0x78,0x29,0x70,0x60 #vmovaps %xmm14, 0x60(%rax)
- .byte 0xc5,0x78,0x29,0x78,0x70 #vmovaps %xmm15, 0x70(%rax)
+ .byte 0x48,0x8d,0x60,0xe0 # lea -0x20(%rax), %rsp
+ .byte 0xc5,0xf8,0x29,0x70,0xe0 # vmovaps %xmm6, -0x20(%rax)
+ .byte 0xc5,0xf8,0x29,0x78,0xf0 # vmovaps %xmm7, -0x10(%rax)
+ .byte 0xc5,0x78,0x29,0x40,0x00 # vmovaps %xmm8, 8(%rax)
+ .byte 0xc5,0x78,0x29,0x48,0x10 # vmovaps %xmm9, 0x10(%rax)
+ .byte 0xc5,0x78,0x29,0x50,0x20 # vmovaps %xmm10, 0x20(%rax)
+ .byte 0xc5,0x78,0x29,0x58,0x30 # vmovaps %xmm11, 0x30(%rax)
+ .byte 0xc5,0x78,0x29,0x60,0x40 # vmovaps %xmm12, 0x40(%rax)
+ .byte 0xc5,0x78,0x29,0x68,0x50 # vmovaps %xmm13, 0x50(%rax)
+ .byte 0xc5,0x78,0x29,0x70,0x60 # vmovaps %xmm14, 0x60(%rax)
+ .byte 0xc5,0x78,0x29,0x78,0x70 # vmovaps %xmm15, 0x70(%rax)
___
$code.=<<___;
vmovdqa .LThree(%rip), $THREE
@@ -1359,11 +1370,11 @@
movaps 0x70(%rsp), %xmm13
movaps 0x80(%rsp), %xmm14
movaps 0x90(%rsp), %xmm15
- lea 0xa8(%rsp), %rsp
-.LSEH_end_ecp_nistz256_avx2_select_w7:
+ lea (%r11), %rsp
___
$code.=<<___;
ret
+.LSEH_end_ecp_nistz256_avx2_select_w7:
.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
___
} else {
@@ -1595,6 +1606,7 @@
push %r14
push %r15
sub \$32*5+8, %rsp
+.Lpoint_double${x}_body:
.Lpoint_double_shortcut$x:
movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$a_ptr.x
@@ -1765,13 +1777,15 @@
movq %xmm1, $r_ptr
call __ecp_nistz256_sub_from$x # p256_sub(res_y, S, res_y);
- add \$32*5+8, %rsp
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbx
- pop %rbp
+ lea 32*5+56(%rsp), %rsi
+ mov -48(%rsi),%r15
+ mov -40(%rsi),%r14
+ mov -32(%rsi),%r13
+ mov -24(%rsi),%r12
+ mov -16(%rsi),%rbx
+ mov -8(%rsi),%rbp
+ lea (%rsi),%rsp
+.Lpoint_double${x}_epilogue:
ret
.size ecp_nistz256_point_double$sfx,.-ecp_nistz256_point_double$sfx
___
@@ -1826,6 +1840,7 @@
push %r14
push %r15
sub \$32*18+8, %rsp
+.Lpoint_add${x}_body:
movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$a_ptr
movdqu 0x10($a_ptr), %xmm1
@@ -2134,13 +2149,15 @@
movdqu %xmm3, 0x30($r_ptr)
.Ladd_done$x:
- add \$32*18+8, %rsp
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbx
- pop %rbp
+ lea 32*18+56(%rsp), %rsi
+ mov -48(%rsi),%r15
+ mov -40(%rsi),%r14
+ mov -32(%rsi),%r13
+ mov -24(%rsi),%r12
+ mov -16(%rsi),%rbx
+ mov -8(%rsi),%rbp
+ lea (%rsi),%rsp
+.Lpoint_add${x}_epilogue:
ret
.size ecp_nistz256_point_add$sfx,.-ecp_nistz256_point_add$sfx
___
@@ -2194,6 +2211,7 @@
push %r14
push %r15
sub \$32*15+8, %rsp
+.Ladd_affine${x}_body:
movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$a_ptr
mov $b_org, $b_ptr # reassign
@@ -2438,13 +2456,15 @@
movdqu %xmm2, 0x20($r_ptr)
movdqu %xmm3, 0x30($r_ptr)
- add \$32*15+8, %rsp
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbx
- pop %rbp
+ lea 32*15+56(%rsp), %rsi
+ mov -48(%rsi),%r15
+ mov -40(%rsi),%r14
+ mov -32(%rsi),%r13
+ mov -24(%rsi),%r12
+ mov -16(%rsi),%rbx
+ mov -8(%rsi),%rbp
+ lea (%rsi),%rsp
+.Ladd_affine${x}_epilogue:
ret
.size ecp_nistz256_point_add_affine$sfx,.-ecp_nistz256_point_add_affine$sfx
___
@@ -2596,6 +2616,291 @@
}
}}}
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+# CONTEXT *context,DISPATCHER_CONTEXT *disp)
+if ($win64) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern __imp_RtlVirtualUnwind
+
+.type short_handler,\@abi-omnipotent
+.align 16
+short_handler:
+ push %rsi
+ push %rdi
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ pushfq
+ sub \$64,%rsp
+
+ mov 120($context),%rax # pull context->Rax
+ mov 248($context),%rbx # pull context->Rip
+
+ mov 8($disp),%rsi # disp->ImageBase
+ mov 56($disp),%r11 # disp->HandlerData
+
+ mov 0(%r11),%r10d # HandlerData[0]
+ lea (%rsi,%r10),%r10 # end of prologue label
+ cmp %r10,%rbx # context->Rip<end of prologue label
+ jb .Lcommon_seh_tail
+
+ mov 152($context),%rax # pull context->Rsp
+
+ mov 4(%r11),%r10d # HandlerData[1]
+ lea (%rsi,%r10),%r10 # epilogue label
+ cmp %r10,%rbx # context->Rip>=epilogue label
+ jae .Lcommon_seh_tail
+
+ lea 16(%rax),%rax
+
+ mov -8(%rax),%r12
+ mov -16(%rax),%r13
+ mov %r12,216($context) # restore context->R12
+ mov %r13,224($context) # restore context->R13
+
+ jmp .Lcommon_seh_tail
+.size short_handler,.-short_handler
+
+.type full_handler,\@abi-omnipotent
+.align 16
+full_handler:
+ push %rsi
+ push %rdi
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ pushfq
+ sub \$64,%rsp
+
+ mov 120($context),%rax # pull context->Rax
+ mov 248($context),%rbx # pull context->Rip
+
+ mov 8($disp),%rsi # disp->ImageBase
+ mov 56($disp),%r11 # disp->HandlerData
+
+ mov 0(%r11),%r10d # HandlerData[0]
+ lea (%rsi,%r10),%r10 # end of prologue label
+ cmp %r10,%rbx # context->Rip<end of prologue label
+ jb .Lcommon_seh_tail
+
+ mov 152($context),%rax # pull context->Rsp
+
+ mov 4(%r11),%r10d # HandlerData[1]
+ lea (%rsi,%r10),%r10 # epilogue label
+ cmp %r10,%rbx # context->Rip>=epilogue label
+ jae .Lcommon_seh_tail
+
+ mov 8(%r11),%r10d # HandlerData[2]
+ lea (%rax,%r10),%rax
+
+ mov -8(%rax),%rbp
+ mov -16(%rax),%rbx
+ mov -24(%rax),%r12
+ mov -32(%rax),%r13
+ mov -40(%rax),%r14
+ mov -48(%rax),%r15
+ mov %rbx,144($context) # restore context->Rbx
+ mov %rbp,160($context) # restore context->Rbp
+ mov %r12,216($context) # restore context->R12
+ mov %r13,224($context) # restore context->R13
+ mov %r14,232($context) # restore context->R14
+ mov %r15,240($context) # restore context->R15
+
+.Lcommon_seh_tail:
+ mov 8(%rax),%rdi
+ mov 16(%rax),%rsi
+ mov %rax,152($context) # restore context->Rsp
+ mov %rsi,168($context) # restore context->Rsi
+ mov %rdi,176($context) # restore context->Rdi
+
+ mov 40($disp),%rdi # disp->ContextRecord
+ mov $context,%rsi # context
+ mov \$154,%ecx # sizeof(CONTEXT)
+ .long 0xa548f3fc # cld; rep movsq
+
+ mov $disp,%rsi
+ xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
+ mov 8(%rsi),%rdx # arg2, disp->ImageBase
+ mov 0(%rsi),%r8 # arg3, disp->ControlPc
+ mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
+ mov 40(%rsi),%r10 # disp->ContextRecord
+ lea 56(%rsi),%r11 # &disp->HandlerData
+ lea 24(%rsi),%r12 # &disp->EstablisherFrame
+ mov %r10,32(%rsp) # arg5
+ mov %r11,40(%rsp) # arg6
+ mov %r12,48(%rsp) # arg7
+ mov %rcx,56(%rsp) # arg8, (NULL)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ mov \$1,%eax # ExceptionContinueSearch
+ add \$64,%rsp
+ popfq
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ pop %rdi
+ pop %rsi
+ ret
+.size full_handler,.-full_handler
+
+.section .pdata
+.align 4
+ .rva .LSEH_begin_ecp_nistz256_neg
+ .rva .LSEH_end_ecp_nistz256_neg
+ .rva .LSEH_info_ecp_nistz256_neg
+
+ .rva .LSEH_begin_ecp_nistz256_mul_mont
+ .rva .LSEH_end_ecp_nistz256_mul_mont
+ .rva .LSEH_info_ecp_nistz256_mul_mont
+
+ .rva .LSEH_begin_ecp_nistz256_sqr_mont
+ .rva .LSEH_end_ecp_nistz256_sqr_mont
+ .rva .LSEH_info_ecp_nistz256_sqr_mont
+
+ .rva .LSEH_begin_ecp_nistz256_select_w5
+ .rva .LSEH_end_ecp_nistz256_select_w5
+ .rva .LSEH_info_ecp_nistz256_select_wX
+
+ .rva .LSEH_begin_ecp_nistz256_select_w7
+ .rva .LSEH_end_ecp_nistz256_select_w7
+ .rva .LSEH_info_ecp_nistz256_select_wX
+___
+$code.=<<___ if ($avx>1);
+ .rva .LSEH_begin_ecp_nistz256_avx2_select_w5
+ .rva .LSEH_end_ecp_nistz256_avx2_select_w5
+ .rva .LSEH_info_ecp_nistz256_avx2_select_wX
+
+ .rva .LSEH_begin_ecp_nistz256_avx2_select_w7
+ .rva .LSEH_end_ecp_nistz256_avx2_select_w7
+ .rva .LSEH_info_ecp_nistz256_avx2_select_wX
+___
+$code.=<<___;
+ .rva .LSEH_begin_ecp_nistz256_point_double
+ .rva .LSEH_end_ecp_nistz256_point_double
+ .rva .LSEH_info_ecp_nistz256_point_double
+
+ .rva .LSEH_begin_ecp_nistz256_point_add
+ .rva .LSEH_end_ecp_nistz256_point_add
+ .rva .LSEH_info_ecp_nistz256_point_add
+
+ .rva .LSEH_begin_ecp_nistz256_point_add_affine
+ .rva .LSEH_end_ecp_nistz256_point_add_affine
+ .rva .LSEH_info_ecp_nistz256_point_add_affine
+___
+$code.=<<___ if ($addx);
+ .rva .LSEH_begin_ecp_nistz256_point_doublex
+ .rva .LSEH_end_ecp_nistz256_point_doublex
+ .rva .LSEH_info_ecp_nistz256_point_doublex
+
+ .rva .LSEH_begin_ecp_nistz256_point_addx
+ .rva .LSEH_end_ecp_nistz256_point_addx
+ .rva .LSEH_info_ecp_nistz256_point_addx
+
+ .rva .LSEH_begin_ecp_nistz256_point_add_affinex
+ .rva .LSEH_end_ecp_nistz256_point_add_affinex
+ .rva .LSEH_info_ecp_nistz256_point_add_affinex
+___
+$code.=<<___;
+
+.section .xdata
+.align 8
+.LSEH_info_ecp_nistz256_neg:
+ .byte 9,0,0,0
+ .rva short_handler
+ .rva .Lneg_body,.Lneg_epilogue # HandlerData[]
+.LSEH_info_ecp_nistz256_mul_mont:
+ .byte 9,0,0,0
+ .rva full_handler
+ .rva .Lmul_body,.Lmul_epilogue # HandlerData[]
+ .long 48,0
+.LSEH_info_ecp_nistz256_sqr_mont:
+ .byte 9,0,0,0
+ .rva full_handler
+ .rva .Lsqr_body,.Lsqr_epilogue # HandlerData[]
+ .long 48,0
+.LSEH_info_ecp_nistz256_select_wX:
+ .byte 0x01,0x33,0x16,0x00
+ .byte 0x33,0xf8,0x09,0x00 #movaps 0x90(rsp),xmm15
+ .byte 0x2e,0xe8,0x08,0x00 #movaps 0x80(rsp),xmm14
+ .byte 0x29,0xd8,0x07,0x00 #movaps 0x70(rsp),xmm13
+ .byte 0x24,0xc8,0x06,0x00 #movaps 0x60(rsp),xmm12
+ .byte 0x1f,0xb8,0x05,0x00 #movaps 0x50(rsp),xmm11
+ .byte 0x1a,0xa8,0x04,0x00 #movaps 0x40(rsp),xmm10
+ .byte 0x15,0x98,0x03,0x00 #movaps 0x30(rsp),xmm9
+ .byte 0x10,0x88,0x02,0x00 #movaps 0x20(rsp),xmm8
+ .byte 0x0c,0x78,0x01,0x00 #movaps 0x10(rsp),xmm7
+ .byte 0x08,0x68,0x00,0x00 #movaps 0x00(rsp),xmm6
+ .byte 0x04,0x01,0x15,0x00 #sub rsp,0xa8
+ .align 8
+___
+$code.=<<___ if ($avx>1);
+.LSEH_info_ecp_nistz256_avx2_select_wX:
+ .byte 0x01,0x36,0x17,0x0b
+ .byte 0x36,0xf8,0x09,0x00 # vmovaps 0x90(rsp),xmm15
+ .byte 0x31,0xe8,0x08,0x00 # vmovaps 0x80(rsp),xmm14
+ .byte 0x2c,0xd8,0x07,0x00 # vmovaps 0x70(rsp),xmm13
+ .byte 0x27,0xc8,0x06,0x00 # vmovaps 0x60(rsp),xmm12
+ .byte 0x22,0xb8,0x05,0x00 # vmovaps 0x50(rsp),xmm11
+ .byte 0x1d,0xa8,0x04,0x00 # vmovaps 0x40(rsp),xmm10
+ .byte 0x18,0x98,0x03,0x00 # vmovaps 0x30(rsp),xmm9
+ .byte 0x13,0x88,0x02,0x00 # vmovaps 0x20(rsp),xmm8
+ .byte 0x0e,0x78,0x01,0x00 # vmovaps 0x10(rsp),xmm7
+ .byte 0x09,0x68,0x00,0x00 # vmovaps 0x00(rsp),xmm6
+ .byte 0x04,0x01,0x15,0x00 # sub rsp,0xa8
+ .byte 0x00,0xb3,0x00,0x00 # set_frame r11
+ .align 8
+___
+$code.=<<___;
+.LSEH_info_ecp_nistz256_point_double:
+ .byte 9,0,0,0
+ .rva full_handler
+ .rva .Lpoint_doubleq_body,.Lpoint_doubleq_epilogue # HandlerData[]
+ .long 32*5+56,0
+.LSEH_info_ecp_nistz256_point_add:
+ .byte 9,0,0,0
+ .rva full_handler
+ .rva .Lpoint_addq_body,.Lpoint_addq_epilogue # HandlerData[]
+ .long 32*18+56,0
+.LSEH_info_ecp_nistz256_point_add_affine:
+ .byte 9,0,0,0
+ .rva full_handler
+ .rva .Ladd_affineq_body,.Ladd_affineq_epilogue # HandlerData[]
+ .long 32*15+56,0
+___
+$code.=<<___ if ($addx);
+.align 8
+.LSEH_info_ecp_nistz256_point_doublex:
+ .byte 9,0,0,0
+ .rva full_handler
+ .rva .Lpoint_doublex_body,.Lpoint_doublex_epilogue # HandlerData[]
+ .long 32*5+56,0
+.LSEH_info_ecp_nistz256_point_addx:
+ .byte 9,0,0,0
+ .rva full_handler
+ .rva .Lpoint_addx_body,.Lpoint_addx_epilogue # HandlerData[]
+ .long 32*18+56,0
+.LSEH_info_ecp_nistz256_point_add_affinex:
+ .byte 9,0,0,0
+ .rva full_handler
+ .rva .Ladd_affinex_body,.Ladd_affinex_epilogue # HandlerData[]
+ .long 32*15+56,0
+___
+}
+
$code =~ s/\`([^\`]*)\`/eval $1/gem;
print $code;
close STDOUT;