Maintain a frame pointer in aesni-gcm-x86_64.pl and add SEH unwind codes
Some profiling systems cannot unwind with CFI and benefit from having a
frame pointer. Since this code doesn't have enough register pressure to
actually need to use rbp as a general register, this change tweaks
things so that a frame pointer is preserved.
As this would invalidate the SEH handler, just replace it with proper
unwind codes, which are more profiler-friendly and supportable by our
unwind tests. Some notes on this:
- We don't currently support the automatic calling convention conversion
with unwind codes, but this file already puts all arguments in
registers, so I just renamed the arguments and put the last two
arguments in RDI and RSI. Those I stashed into the parameter stack
area because it's free storage.
- It is tedious to write the same directives in both CFI and SEH. We
really could do with an abstraction. Although since most of our
functions need a Windows variation anyway.
- I restored the original file's use of PUSH to save the registers.
This matches what Clang likes to output anyway, and push is probably
smaller than the corresponding move with offset. (And it reduces how
much thinking about offsets I need to do.)
- Although it's an extra instruction, I restored the original file's
separate fixed stack allocation and alloca for the sake of clarity.
- The epilog is constrained by Windows being extremely picky about
epilogs. (Windows doesn't annotate epilogs and instead simulates
forward.) I think other options are possible, but using LEA with an
offset to realign the stack for the POPs both matches the examples in
Windows and what Clang seems to like to output. The original file used
MOV with offset, but it seems to be related to the funny SEH handler.
- The offsets in SEH directives may be surprising to someone used to CFI
directives or a SysV RBP frame pointer. All three use slightly
different baselines:
CFI's canonical frame address (CFA) is RSP just before a CALL (so
before the saved RIP in stack order). It is 16-byte aligned by ABI.
A SysV RBP frame pointer is 16 bytes after that, after a saved RIP and
saved RBP. It is also 16-byte aligned.
Windows' baseline is the top of the fixed stack allocation, so
potentially some bytes after that (all pushreg and allocstack
directives). This too is required to be 16-byte aligned.
Windows, however, doesn't require the frame register actually contain
the fixed stack allocation. You can specify an offset from the value
in the register to the actual top. But all the offsets in savereg,
etc., directives use this baseline.
Performance difference is within measurement noise.
This does not create a stack frame for internal functions so
frame-pointer unwinding may miss a function or two, but the broad
attribution will be correct.
Change originally by Clemens Fruhwirth. Then reworked from Adam
Langley's https://boringssl-review.googlesource.com/c/boringssl/+/55945
by me to work on Windows and fix up some issues with the RBP setup.
Bug: b/33072965, 259
Change-Id: I52302635a8ad3d9272404feac125e2a4a4a5d14c
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/56128
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
index 21dbf69..e726c04 100644
--- a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
@@ -71,14 +71,18 @@
# no AVX2 instructions being used.
if ($avx>1) {{{
-($inp,$out,$len,$key,$ivp,$Xip)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9");
+# On Windows, only four parameters are passed in registers. The last two
+# parameters will be manually loaded into %rdi and %rsi.
+my ($inp, $out, $len, $key, $ivp, $Xip) =
+ $win64 ? ("%rcx", "%rdx", "%r8", "%r9", "%rdi", "%rsi") :
+ ("%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9");
($Ii,$T1,$T2,$Hkey,
$Z0,$Z1,$Z2,$Z3,$Xi) = map("%xmm$_",(0..8));
($inout0,$inout1,$inout2,$inout3,$inout4,$inout5,$rndkey) = map("%xmm$_",(9..15));
-($counter,$rounds,$ret,$const,$in0,$end0)=("%ebx","%ebp","%r10","%r11","%r14","%r15");
+($counter,$rounds,$const,$in0,$end0)=("%ebx","%r10d","%r11","%r14","%r15");
$code=<<___;
.text
@@ -390,7 +394,7 @@
vaesenclast $Hkey,$inout5,$inout5
vpaddb $T2,$Z3,$Hkey
- add \$0x60,$ret
+ add \$0x60,%rax
sub \$0x6,$len
jc .L6x_done
@@ -424,46 +428,76 @@
# struct { u128 Xi,H,Htbl[9]; } *Xip);
$code.=<<___;
.globl aesni_gcm_decrypt
-.type aesni_gcm_decrypt,\@function,6
+.type aesni_gcm_decrypt,\@abi-omnipotent
.align 32
aesni_gcm_decrypt:
.cfi_startproc
- xor $ret,$ret
+.seh_startproc
+ xor %rax,%rax
# We call |_aesni_ctr32_ghash_6x|, which requires at least 96 (0x60)
# bytes of input.
cmp \$0x60,$len # minimal accepted length
jb .Lgcm_dec_abort
- lea (%rsp),%rax # save stack pointer
-.cfi_def_cfa_register %rax
- push %rbx
-.cfi_push %rbx
push %rbp
.cfi_push %rbp
+.seh_pushreg %rbp
+ mov %rsp, %rbp # save stack pointer
+.cfi_def_cfa_register %rbp
+ push %rbx
+.cfi_push %rbx
+.seh_pushreg %rbx
push %r12
.cfi_push %r12
+.seh_pushreg %r12
push %r13
.cfi_push %r13
+.seh_pushreg %r13
push %r14
.cfi_push %r14
+.seh_pushreg %r14
push %r15
.cfi_push %r15
+.seh_pushreg %r15
___
-$code.=<<___ if ($win64);
- lea -0xa8(%rsp),%rsp
- movaps %xmm6,-0xd8(%rax)
- movaps %xmm7,-0xc8(%rax)
- movaps %xmm8,-0xb8(%rax)
- movaps %xmm9,-0xa8(%rax)
- movaps %xmm10,-0x98(%rax)
- movaps %xmm11,-0x88(%rax)
- movaps %xmm12,-0x78(%rax)
- movaps %xmm13,-0x68(%rax)
- movaps %xmm14,-0x58(%rax)
- movaps %xmm15,-0x48(%rax)
-.Lgcm_dec_body:
+if ($win64) {
+$code.=<<___
+ lea -0xa8(%rsp),%rsp # 8 extra bytes to align the stack
+.seh_allocstack 0xa8
+.seh_setframe %rbp, 0xa8+5*8
+ # Load the last two parameters. These go into %rdi and %rsi, which are
+ # non-volatile on Windows, so stash them in the parameter stack area
+ # first.
+ mov %rdi, 0x10(%rbp)
+.seh_savereg %rdi, 0xa8+5*8+0x10
+ mov %rsi, 0x18(%rbp)
+.seh_savereg %rsi, 0xa8+5*8+0x18
+ mov 0x30(%rbp), $ivp
+ mov 0x38(%rbp), $Xip
+ # Save non-volatile XMM registers.
+ movaps %xmm6,-0xd0(%rbp)
+.seh_savexmm128 %xmm6, 0xa8+5*8-0xd0
+ movaps %xmm7,-0xc0(%rbp)
+.seh_savexmm128 %xmm7, 0xa8+5*8-0xc0
+ movaps %xmm8,-0xb0(%rbp)
+.seh_savexmm128 %xmm8, 0xa8+5*8-0xb0
+ movaps %xmm9,-0xa0(%rbp)
+.seh_savexmm128 %xmm9, 0xa8+5*8-0xa0
+ movaps %xmm10,-0x90(%rbp)
+.seh_savexmm128 %xmm10, 0xa8+5*8-0x90
+ movaps %xmm11,-0x80(%rbp)
+.seh_savexmm128 %xmm11, 0xa8+5*8-0x80
+ movaps %xmm12,-0x70(%rbp)
+.seh_savexmm128 %xmm12, 0xa8+5*8-0x70
+ movaps %xmm13,-0x60(%rbp)
+.seh_savexmm128 %xmm13, 0xa8+5*8-0x60
+ movaps %xmm14,-0x50(%rbp)
+.seh_savexmm128 %xmm14, 0xa8+5*8-0x50
+ movaps %xmm15,-0x40(%rbp)
+.seh_savexmm128 %xmm15, 0xa8+5*8-0x40
___
+}
$code.=<<___;
vzeroupper
@@ -491,7 +525,7 @@
.Ldec_no_key_aliasing:
vmovdqu 0x50($inp),$Z3 # I[5]
- lea ($inp),$in0
+ mov $inp,$in0
vmovdqu 0x40($inp),$Z0
# |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0)
@@ -504,7 +538,7 @@
vmovdqu 0x30($inp),$Z1
shr \$4,$len
- xor $ret,$ret
+ xor %rax,%rax
vmovdqu 0x20($inp),$Z2
vpshufb $Ii,$Z3,$Z3 # passed to _aesni_ctr32_ghash_6x
vmovdqu 0x10($inp),$T2
@@ -535,35 +569,37 @@
vzeroupper
___
$code.=<<___ if ($win64);
- movaps -0xd8(%rax),%xmm6
- movaps -0xc8(%rax),%xmm7
- movaps -0xb8(%rax),%xmm8
- movaps -0xa8(%rax),%xmm9
- movaps -0x98(%rax),%xmm10
- movaps -0x88(%rax),%xmm11
- movaps -0x78(%rax),%xmm12
- movaps -0x68(%rax),%xmm13
- movaps -0x58(%rax),%xmm14
- movaps -0x48(%rax),%xmm15
+ movaps -0xd0(%rbp),%xmm6
+ movaps -0xc0(%rbp),%xmm7
+ movaps -0xb0(%rbp),%xmm8
+ movaps -0xa0(%rbp),%xmm9
+ movaps -0x90(%rbp),%xmm10
+ movaps -0x80(%rbp),%xmm11
+ movaps -0x70(%rbp),%xmm12
+ movaps -0x60(%rbp),%xmm13
+ movaps -0x50(%rbp),%xmm14
+ movaps -0x40(%rbp),%xmm15
+ mov 0x10(%rbp),%rdi
+ mov 0x18(%rbp),%rsi
___
$code.=<<___;
- mov -48(%rax),%r15
-.cfi_restore %r15
- mov -40(%rax),%r14
-.cfi_restore %r14
- mov -32(%rax),%r13
-.cfi_restore %r13
- mov -24(%rax),%r12
-.cfi_restore %r12
- mov -16(%rax),%rbp
-.cfi_restore %rbp
- mov -8(%rax),%rbx
-.cfi_restore %rbx
- lea (%rax),%rsp # restore %rsp
-.cfi_def_cfa_register %rsp
+ lea -0x28(%rbp), %rsp # restore %rsp to fixed allocation
+.cfi_def_cfa %rsp, 0x38
+ pop %r15
+.cfi_pop %r15
+ pop %r14
+.cfi_pop %r14
+ pop %r13
+.cfi_pop %r13
+ pop %r12
+.cfi_pop %r12
+ pop %rbx
+.cfi_pop %rbx
+ pop %rbp
+.cfi_pop %rbp
.Lgcm_dec_abort:
- mov $ret,%rax # return value
ret
+.seh_endproc
.cfi_endproc
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
___
@@ -663,15 +699,16 @@
.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
.globl aesni_gcm_encrypt
-.type aesni_gcm_encrypt,\@function,6
+.type aesni_gcm_encrypt,\@abi-omnipotent
.align 32
aesni_gcm_encrypt:
.cfi_startproc
+.seh_startproc
#ifdef BORINGSSL_DISPATCH_TEST
.extern BORINGSSL_function_hit
movb \$1,BORINGSSL_function_hit+2(%rip)
#endif
- xor $ret,$ret
+ xor %rax,%rax
# We call |_aesni_ctr32_6x| twice, each call consuming 96 bytes of
# input. Then we call |_aesni_ctr32_ghash_6x|, which requires at
@@ -679,35 +716,64 @@
cmp \$0x60*3,$len # minimal accepted length
jb .Lgcm_enc_abort
- lea (%rsp),%rax # save stack pointer
-.cfi_def_cfa_register %rax
- push %rbx
-.cfi_push %rbx
push %rbp
.cfi_push %rbp
+.seh_pushreg %rbp
+ mov %rsp, %rbp # save stack pointer
+.cfi_def_cfa_register %rbp
+ push %rbx
+.cfi_push %rbx
+.seh_pushreg %rbx
push %r12
.cfi_push %r12
+.seh_pushreg %r12
push %r13
.cfi_push %r13
+.seh_pushreg %r13
push %r14
.cfi_push %r14
+.seh_pushreg %r14
push %r15
.cfi_push %r15
+.seh_pushreg %r15
___
-$code.=<<___ if ($win64);
- lea -0xa8(%rsp),%rsp
- movaps %xmm6,-0xd8(%rax)
- movaps %xmm7,-0xc8(%rax)
- movaps %xmm8,-0xb8(%rax)
- movaps %xmm9,-0xa8(%rax)
- movaps %xmm10,-0x98(%rax)
- movaps %xmm11,-0x88(%rax)
- movaps %xmm12,-0x78(%rax)
- movaps %xmm13,-0x68(%rax)
- movaps %xmm14,-0x58(%rax)
- movaps %xmm15,-0x48(%rax)
-.Lgcm_enc_body:
+if ($win64) {
+$code.=<<___
+ lea -0xa8(%rsp),%rsp # 8 extra bytes to align the stack
+.seh_allocstack 0xa8
+.seh_setframe %rbp, 0xa8+5*8
+ # Load the last two parameters. These go into %rdi and %rsi, which are
+ # non-volatile on Windows, so stash them in the parameter stack area
+ # first.
+ mov %rdi, 0x10(%rbp)
+.seh_savereg %rdi, 0xa8+5*8+0x10
+ mov %rsi, 0x18(%rbp)
+.seh_savereg %rsi, 0xa8+5*8+0x18
+ mov 0x30(%rbp), $ivp
+ mov 0x38(%rbp), $Xip
+ # Save non-volatile XMM registers.
+ movaps %xmm6,-0xd0(%rbp)
+.seh_savexmm128 %xmm6, 0xa8+5*8-0xd0
+ movaps %xmm7,-0xc0(%rbp)
+.seh_savexmm128 %xmm7, 0xa8+5*8-0xc0
+ movaps %xmm8,-0xb0(%rbp)
+.seh_savexmm128 %xmm8, 0xa8+5*8-0xb0
+ movaps %xmm9,-0xa0(%rbp)
+.seh_savexmm128 %xmm9, 0xa8+5*8-0xa0
+ movaps %xmm10,-0x90(%rbp)
+.seh_savexmm128 %xmm10, 0xa8+5*8-0x90
+ movaps %xmm11,-0x80(%rbp)
+.seh_savexmm128 %xmm11, 0xa8+5*8-0x80
+ movaps %xmm12,-0x70(%rbp)
+.seh_savexmm128 %xmm12, 0xa8+5*8-0x70
+ movaps %xmm13,-0x60(%rbp)
+.seh_savexmm128 %xmm13, 0xa8+5*8-0x60
+ movaps %xmm14,-0x50(%rbp)
+.seh_savexmm128 %xmm14, 0xa8+5*8-0x50
+ movaps %xmm15,-0x40(%rbp)
+.seh_savexmm128 %xmm15, 0xa8+5*8-0x40
___
+}
$code.=<<___;
vzeroupper
@@ -731,7 +797,7 @@
sub $end0,%rsp # avoid aliasing with key
.Lenc_no_key_aliasing:
- lea ($out),$in0
+ mov $out,$in0
# |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0)
# bytes before the end of the input. Note, in particular, that this is
@@ -762,7 +828,7 @@
vmovdqu ($Xip),$Xi # load Xi
lea 0x20+0x20($Xip),$Xip # size optimization
sub \$12,$len
- mov \$0x60*2,$ret
+ mov \$0x60*2,%rax
vpshufb $Ii,$Xi,$Xi
call _aesni_ctr32_ghash_6x
@@ -951,37 +1017,39 @@
vzeroupper
___
$code.=<<___ if ($win64);
- movaps -0xd8(%rax),%xmm6
- movaps -0xc8(%rax),%xmm7
- movaps -0xb8(%rax),%xmm8
- movaps -0xa8(%rax),%xmm9
- movaps -0x98(%rax),%xmm10
- movaps -0x88(%rax),%xmm11
- movaps -0x78(%rax),%xmm12
- movaps -0x68(%rax),%xmm13
- movaps -0x58(%rax),%xmm14
- movaps -0x48(%rax),%xmm15
+ movaps -0xd0(%rbp),%xmm6
+ movaps -0xc0(%rbp),%xmm7
+ movaps -0xb0(%rbp),%xmm8
+ movaps -0xa0(%rbp),%xmm9
+ movaps -0x90(%rbp),%xmm10
+ movaps -0x80(%rbp),%xmm11
+ movaps -0x70(%rbp),%xmm12
+ movaps -0x60(%rbp),%xmm13
+ movaps -0x50(%rbp),%xmm14
+ movaps -0x40(%rbp),%xmm15
+ mov 0x10(%rbp),%rdi
+ mov 0x18(%rbp),%rsi
___
$code.=<<___;
- mov -48(%rax),%r15
-.cfi_restore %r15
- mov -40(%rax),%r14
-.cfi_restore %r14
- mov -32(%rax),%r13
-.cfi_restore %r13
- mov -24(%rax),%r12
-.cfi_restore %r12
- mov -16(%rax),%rbp
-.cfi_restore %rbp
- mov -8(%rax),%rbx
-.cfi_restore %rbx
- lea (%rax),%rsp # restore %rsp
-.cfi_def_cfa_register %rsp
+ lea -0x28(%rbp), %rsp # restore %rsp to fixed allocation
+.cfi_def_cfa %rsp, 0x38
+ pop %r15
+.cfi_pop %r15
+ pop %r14
+.cfi_pop %r14
+ pop %r13
+.cfi_pop %r13
+ pop %r12
+.cfi_pop %r12
+ pop %rbx
+.cfi_pop %rbx
+ pop %rbp
+.cfi_pop %rbp
.Lgcm_enc_abort:
- mov $ret,%rax # return value
ret
+.seh_endproc
.cfi_endproc
-.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
+.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
___
$code.=<<___;
@@ -999,127 +1067,6 @@
.asciz "AES-NI GCM module for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
.align 64
___
-if ($win64) {
-$rec="%rcx";
-$frame="%rdx";
-$context="%r8";
-$disp="%r9";
-
-$code.=<<___
-.extern __imp_RtlVirtualUnwind
-.type gcm_se_handler,\@abi-omnipotent
-.align 16
-gcm_se_handler:
- push %rsi
- push %rdi
- push %rbx
- push %rbp
- push %r12
- push %r13
- push %r14
- push %r15
- pushfq
- sub \$64,%rsp
-
- mov 120($context),%rax # pull context->Rax
- mov 248($context),%rbx # pull context->Rip
-
- mov 8($disp),%rsi # disp->ImageBase
- mov 56($disp),%r11 # disp->HandlerData
-
- mov 0(%r11),%r10d # HandlerData[0]
- lea (%rsi,%r10),%r10 # prologue label
- cmp %r10,%rbx # context->Rip<prologue label
- jb .Lcommon_seh_tail
-
- mov 152($context),%rax # pull context->Rsp
-
- mov 4(%r11),%r10d # HandlerData[1]
- lea (%rsi,%r10),%r10 # epilogue label
- cmp %r10,%rbx # context->Rip>=epilogue label
- jae .Lcommon_seh_tail
-
- mov 120($context),%rax # pull context->Rax
-
- mov -48(%rax),%r15
- mov -40(%rax),%r14
- mov -32(%rax),%r13
- mov -24(%rax),%r12
- mov -16(%rax),%rbp
- mov -8(%rax),%rbx
- mov %r15,240($context)
- mov %r14,232($context)
- mov %r13,224($context)
- mov %r12,216($context)
- mov %rbp,160($context)
- mov %rbx,144($context)
-
- lea -0xd8(%rax),%rsi # %xmm save area
- lea 512($context),%rdi # & context.Xmm6
- mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
- .long 0xa548f3fc # cld; rep movsq
-
-.Lcommon_seh_tail:
- mov 8(%rax),%rdi
- mov 16(%rax),%rsi
- mov %rax,152($context) # restore context->Rsp
- mov %rsi,168($context) # restore context->Rsi
- mov %rdi,176($context) # restore context->Rdi
-
- mov 40($disp),%rdi # disp->ContextRecord
- mov $context,%rsi # context
- mov \$154,%ecx # sizeof(CONTEXT)
- .long 0xa548f3fc # cld; rep movsq
-
- mov $disp,%rsi
- xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
- mov 8(%rsi),%rdx # arg2, disp->ImageBase
- mov 0(%rsi),%r8 # arg3, disp->ControlPc
- mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
- mov 40(%rsi),%r10 # disp->ContextRecord
- lea 56(%rsi),%r11 # &disp->HandlerData
- lea 24(%rsi),%r12 # &disp->EstablisherFrame
- mov %r10,32(%rsp) # arg5
- mov %r11,40(%rsp) # arg6
- mov %r12,48(%rsp) # arg7
- mov %rcx,56(%rsp) # arg8, (NULL)
- call *__imp_RtlVirtualUnwind(%rip)
-
- mov \$1,%eax # ExceptionContinueSearch
- add \$64,%rsp
- popfq
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbp
- pop %rbx
- pop %rdi
- pop %rsi
- ret
-.size gcm_se_handler,.-gcm_se_handler
-
-.section .pdata
-.align 4
- .rva .LSEH_begin_aesni_gcm_decrypt
- .rva .LSEH_end_aesni_gcm_decrypt
- .rva .LSEH_gcm_dec_info
-
- .rva .LSEH_begin_aesni_gcm_encrypt
- .rva .LSEH_end_aesni_gcm_encrypt
- .rva .LSEH_gcm_enc_info
-.section .xdata
-.align 8
-.LSEH_gcm_dec_info:
- .byte 9,0,0,0
- .rva gcm_se_handler
- .rva .Lgcm_dec_body,.Lgcm_dec_abort
-.LSEH_gcm_enc_info:
- .byte 9,0,0,0
- .rva gcm_se_handler
- .rva .Lgcm_enc_body,.Lgcm_enc_abort
-___
-}
}}} else {{{
$code=<<___; # assembler is too old
.text
diff --git a/crypto/fipsmodule/modes/gcm_test.cc b/crypto/fipsmodule/modes/gcm_test.cc
index 8d15cc6..324d0e8 100644
--- a/crypto/fipsmodule/modes/gcm_test.cc
+++ b/crypto/fipsmodule/modes/gcm_test.cc
@@ -173,17 +173,17 @@
aes_hw_set_encrypt_key(kKey, 128, &aes_key);
for (size_t blocks : kBlockCounts) {
- CHECK_ABI(aesni_gcm_encrypt, buf, buf, blocks * 16, &aes_key, iv,
- gcm.Xi.u);
- CHECK_ABI(aesni_gcm_encrypt, buf, buf, blocks * 16 + 7, &aes_key, iv,
- gcm.Xi.u);
+ CHECK_ABI_SEH(aesni_gcm_encrypt, buf, buf, blocks * 16, &aes_key, iv,
+ gcm.Xi.u);
+ CHECK_ABI_SEH(aesni_gcm_encrypt, buf, buf, blocks * 16 + 7, &aes_key,
+ iv, gcm.Xi.u);
}
aes_hw_set_decrypt_key(kKey, 128, &aes_key);
for (size_t blocks : kBlockCounts) {
- CHECK_ABI(aesni_gcm_decrypt, buf, buf, blocks * 16, &aes_key, iv,
- gcm.Xi.u);
- CHECK_ABI(aesni_gcm_decrypt, buf, buf, blocks * 16 + 7, &aes_key, iv,
- gcm.Xi.u);
+ CHECK_ABI_SEH(aesni_gcm_decrypt, buf, buf, blocks * 16, &aes_key, iv,
+ gcm.Xi.u);
+ CHECK_ABI_SEH(aesni_gcm_decrypt, buf, buf, blocks * 16 + 7, &aes_key,
+ iv, gcm.Xi.u);
}
}
}