Add AES ABI tests. This involves fixing some bugs in aes_nohw_cbc_encrypt's annotations, and working around a libunwind bug. In doing so, support .cfi_remember_state and .cfi_restore_state in perlasm. Change-Id: Iaedfe691356b0468327a6be0958d034dafa760e5 Reviewed-on: https://boringssl-review.googlesource.com/c/34189 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/aes/aes_test.cc b/crypto/fipsmodule/aes/aes_test.cc index ccec597..a0c9411 100644 --- a/crypto/fipsmodule/aes/aes_test.cc +++ b/crypto/fipsmodule/aes/aes_test.cc
@@ -23,7 +23,9 @@ #include <openssl/aes.h> +#include "internal.h" #include "../../internal.h" +#include "../../test/abi_test.h" #include "../../test/file_test.h" #include "../../test/test_util.h" #include "../../test/wycheproof_util.h" @@ -186,3 +188,105 @@ AES_wrap_key(&aes, nullptr, out.data(), in.data(), in.size())); } } + +#if defined(SUPPORTS_ABI_TEST) +TEST(AESTest, ABI) { + for (int bits : {128, 192, 256}) { + SCOPED_TRACE(bits); + const uint8_t kKey[256/8] = {0}; + AES_KEY key; + uint8_t block[AES_BLOCK_SIZE]; + uint8_t buf[AES_BLOCK_SIZE * 64] = {0}; + std::vector<int> block_counts; + if (bits == 128) { + block_counts = {0, 1, 2, 3, 4, 8, 16, 31}; + } else { + // Unwind tests are very slow. Assume that the various input sizes do not + // differ significantly by round count for ABI purposes. + block_counts = {0, 1, 8}; + } + + CHECK_ABI(aes_nohw_set_encrypt_key, kKey, bits, &key); + CHECK_ABI(aes_nohw_encrypt, block, block, &key); +#if defined(AES_NOHW_CBC) + for (size_t blocks : block_counts) { + SCOPED_TRACE(blocks); + CHECK_ABI(aes_nohw_cbc_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key, + block, AES_ENCRYPT); + } +#endif + + CHECK_ABI(aes_nohw_set_decrypt_key, kKey, bits, &key); + CHECK_ABI(aes_nohw_decrypt, block, block, &key); +#if defined(AES_NOHW_CBC) + for (size_t blocks : block_counts) { + SCOPED_TRACE(blocks); + CHECK_ABI(aes_nohw_cbc_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key, + block, AES_DECRYPT); + } +#endif + + if (bsaes_capable()) { + aes_nohw_set_encrypt_key(kKey, bits, &key); + for (size_t blocks : block_counts) { + SCOPED_TRACE(blocks); + if (blocks != 0) { + CHECK_ABI(bsaes_ctr32_encrypt_blocks, buf, buf, blocks, &key, block); + } + } + + aes_nohw_set_decrypt_key(kKey, bits, &key); + for (size_t blocks : block_counts) { + SCOPED_TRACE(blocks); + CHECK_ABI(bsaes_cbc_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key, + block, AES_DECRYPT); + } + } + + if (vpaes_capable()) { + CHECK_ABI(vpaes_set_encrypt_key, kKey, bits, &key); + CHECK_ABI(vpaes_encrypt, block, block, &key); + for (size_t blocks : block_counts) { + SCOPED_TRACE(blocks); + CHECK_ABI(vpaes_cbc_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key, + block, AES_ENCRYPT); + } + + CHECK_ABI(vpaes_set_decrypt_key, kKey, bits, &key); + CHECK_ABI(vpaes_decrypt, block, block, &key); + for (size_t blocks : block_counts) { + SCOPED_TRACE(blocks); + CHECK_ABI(vpaes_cbc_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key, + block, AES_DECRYPT); + } + } + + if (hwaes_capable()) { + CHECK_ABI(aes_hw_set_encrypt_key, kKey, bits, &key); + CHECK_ABI(aes_hw_encrypt, block, block, &key); + for (size_t blocks : block_counts) { + SCOPED_TRACE(blocks); + CHECK_ABI(aes_hw_cbc_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key, + block, AES_ENCRYPT); + CHECK_ABI(aes_hw_ctr32_encrypt_blocks, buf, buf, blocks, &key, block); +#if defined(HWAES_ECB) + CHECK_ABI(aes_hw_ecb_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key, + AES_ENCRYPT); +#endif + } + + CHECK_ABI(aes_hw_set_decrypt_key, kKey, bits, &key); + CHECK_ABI(aes_hw_decrypt, block, block, &key); + for (size_t blocks : block_counts) { + SCOPED_TRACE(blocks); + CHECK_ABI(aes_hw_cbc_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key, + block, AES_DECRYPT); +#if defined(HWAES_ECB) + CHECK_ABI(aes_hw_ecb_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key, + AES_DECRYPT); +#endif + } + } + } +} +#endif // SUPPORTS_ABI_TEST
diff --git a/crypto/fipsmodule/aes/asm/aes-x86_64.pl b/crypto/fipsmodule/aes/asm/aes-x86_64.pl index 57c4e08..ea8b9a4 100755 --- a/crypto/fipsmodule/aes/asm/aes-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/aes-x86_64.pl
@@ -554,6 +554,7 @@ .type _x86_64_AES_encrypt_compact,\@abi-omnipotent .align 16 _x86_64_AES_encrypt_compact: +.cfi_startproc lea 128($sbox),$inp # size optimization mov 0-128($inp),$acc1 # prefetch Te4 mov 32-128($inp),$acc2 @@ -587,6 +588,7 @@ xor 8($key),$s2 xor 12($key),$s3 .byte 0xf3,0xc3 # rep ret +.cfi_endproc .size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact ___ @@ -1159,6 +1161,7 @@ .type _x86_64_AES_decrypt_compact,\@abi-omnipotent .align 16 _x86_64_AES_decrypt_compact: +.cfi_startproc lea 128($sbox),$inp # size optimization mov 0-128($inp),$acc1 # prefetch Td4 mov 32-128($inp),$acc2 @@ -1201,6 +1204,7 @@ xor 8($key),$s2 xor 12($key),$s3 .byte 0xf3,0xc3 # rep ret +.cfi_endproc .size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact ___ @@ -1360,6 +1364,7 @@ .type _x86_64_AES_set_encrypt_key,\@abi-omnipotent .align 16 _x86_64_AES_set_encrypt_key: +.cfi_startproc mov %esi,%ecx # %ecx=bits mov %rdi,%rsi # %rsi=userKey mov %rdx,%rdi # %rdi=key @@ -1541,6 +1546,7 @@ mov \$-1,%rax .Lexit: .byte 0xf3,0xc3 # rep ret +.cfi_endproc .size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key ___ @@ -1720,7 +1726,9 @@ cmp \$0,%rdx # check length je .Lcbc_epilogue pushfq -.cfi_push 49 # %rflags +# This could be .cfi_push 49, but libunwind fails on registers it does not +# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087. +.cfi_adjust_cfa_offset 8 push %rbx .cfi_push %rbx push %rbp @@ -1743,6 +1751,7 @@ cmp \$0,%r9 cmoveq %r10,$sbox +.cfi_remember_state leaq OPENSSL_ia32cap_P(%rip),%r10 mov (%r10), %r10d cmp \$$speed_limit,%rdx @@ -1979,6 +1988,7 @@ #--------------------------- SLOW ROUTINE ---------------------------# .align 16 .Lcbc_slow_prologue: +.cfi_restore_state # allocate aligned stack frame... lea -88(%rsp),%rbp and \$-64,%rbp @@ -1990,8 +2000,10 @@ sub %r10,%rbp xchg %rsp,%rbp +.cfi_def_cfa_register %rbp #add \$8,%rsp # reserve for return address! mov %rbp,$_rsp # save %rsp +.cfi_cfa_expression $_rsp,deref,+64 .Lcbc_slow_body: #mov %rdi,$_inp # save copy of inp #mov %rsi,$_out # save copy of out @@ -2180,7 +2192,9 @@ .cfi_def_cfa %rsp,16 .Lcbc_popfq: popfq -.cfi_pop 49 # %rflags +# This could be .cfi_pop 49, but libunwind fails on registers it does not +# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087. +.cfi_adjust_cfa_offset -8 .Lcbc_epilogue: ret .cfi_endproc
diff --git a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl index 6545274..14175ad 100644 --- a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
@@ -274,6 +274,7 @@ .type ${PREFIX}_encrypt,\@abi-omnipotent .align 16 ${PREFIX}_encrypt: +.cfi_startproc movups ($inp),$inout0 # load input mov 240($key),$rounds # key->rounds ___ @@ -284,12 +285,14 @@ movups $inout0,($out) # output pxor $inout0,$inout0 ret +.cfi_endproc .size ${PREFIX}_encrypt,.-${PREFIX}_encrypt .globl ${PREFIX}_decrypt .type ${PREFIX}_decrypt,\@abi-omnipotent .align 16 ${PREFIX}_decrypt: +.cfi_startproc movups ($inp),$inout0 # load input mov 240($key),$rounds # key->rounds ___ @@ -300,6 +303,7 @@ movups $inout0,($out) # output pxor $inout0,$inout0 ret +.cfi_endproc .size ${PREFIX}_decrypt, .-${PREFIX}_decrypt ___ } @@ -325,6 +329,7 @@ .type _aesni_${dir}rypt2,\@abi-omnipotent .align 16 _aesni_${dir}rypt2: +.cfi_startproc $movkey ($key),$rndkey0 shl \$4,$rounds $movkey 16($key),$rndkey1 @@ -350,6 +355,7 @@ aes${dir}last $rndkey0,$inout0 aes${dir}last $rndkey0,$inout1 ret +.cfi_endproc .size _aesni_${dir}rypt2,.-_aesni_${dir}rypt2 ___ } @@ -361,6 +367,7 @@ .type _aesni_${dir}rypt3,\@abi-omnipotent .align 16 _aesni_${dir}rypt3: +.cfi_startproc $movkey ($key),$rndkey0 shl \$4,$rounds $movkey 16($key),$rndkey1 @@ -391,6 +398,7 @@ aes${dir}last $rndkey0,$inout1 aes${dir}last $rndkey0,$inout2 ret +.cfi_endproc .size _aesni_${dir}rypt3,.-_aesni_${dir}rypt3 ___ } @@ -406,6 +414,7 @@ .type _aesni_${dir}rypt4,\@abi-omnipotent .align 16 _aesni_${dir}rypt4: +.cfi_startproc $movkey ($key),$rndkey0 shl \$4,$rounds $movkey 16($key),$rndkey1 @@ -442,6 +451,7 @@ aes${dir}last $rndkey0,$inout2 aes${dir}last $rndkey0,$inout3 ret +.cfi_endproc .size _aesni_${dir}rypt4,.-_aesni_${dir}rypt4 ___ } @@ -453,6 +463,7 @@ .type _aesni_${dir}rypt6,\@abi-omnipotent .align 16 _aesni_${dir}rypt6: +.cfi_startproc $movkey ($key),$rndkey0 shl \$4,$rounds $movkey 16($key),$rndkey1 @@ -503,6 +514,7 @@ aes${dir}last $rndkey0,$inout4 aes${dir}last $rndkey0,$inout5 ret +.cfi_endproc .size _aesni_${dir}rypt6,.-_aesni_${dir}rypt6 ___ } @@ -514,6 +526,7 @@ .type _aesni_${dir}rypt8,\@abi-omnipotent .align 16 _aesni_${dir}rypt8: +.cfi_startproc $movkey ($key),$rndkey0 shl \$4,$rounds $movkey 16($key),$rndkey1 @@ -574,6 +587,7 @@ aes${dir}last $rndkey0,$inout6 aes${dir}last $rndkey0,$inout7 ret +.cfi_endproc .size _aesni_${dir}rypt8,.-_aesni_${dir}rypt8 ___ } @@ -598,6 +612,7 @@ .type ${PREFIX}_ecb_encrypt,\@function,5 .align 16 ${PREFIX}_ecb_encrypt: +.cfi_startproc ___ $code.=<<___ if ($win64); lea -0x58(%rsp),%rsp @@ -943,6 +958,7 @@ ___ $code.=<<___; ret +.cfi_endproc .size ${PREFIX}_ecb_encrypt,.-${PREFIX}_ecb_encrypt ___
diff --git a/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl b/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl index c0ade37..f0031ef 100644 --- a/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
@@ -817,6 +817,7 @@ .type _bsaes_encrypt8,\@abi-omnipotent .align 64 _bsaes_encrypt8: +.cfi_startproc lea .LBS0(%rip), $const # constants table movdqa ($key), @XMM[9] # round 0 key @@ -876,11 +877,13 @@ pxor @XMM[8], @XMM[0] pxor @XMM[8], @XMM[1] ret +.cfi_endproc .size _bsaes_encrypt8,.-_bsaes_encrypt8 .type _bsaes_decrypt8,\@abi-omnipotent .align 64 _bsaes_decrypt8: +.cfi_startproc lea .LBS0(%rip), $const # constants table movdqa ($key), @XMM[9] # round 0 key @@ -938,6 +941,7 @@ pxor @XMM[8], @XMM[0] pxor @XMM[8], @XMM[1] ret +.cfi_endproc .size _bsaes_decrypt8,.-_bsaes_decrypt8 ___ } @@ -972,6 +976,7 @@ .type _bsaes_key_convert,\@abi-omnipotent .align 16 _bsaes_key_convert: +.cfi_startproc lea .Lmasks(%rip), $const movdqu ($inp), %xmm7 # load round 0 key lea 0x10($inp), $inp @@ -1050,6 +1055,7 @@ movdqa 0x50($const), %xmm7 # .L63 #movdqa %xmm6, ($out) # don't save last round key ret +.cfi_endproc .size _bsaes_key_convert,.-_bsaes_key_convert ___ }
diff --git a/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl b/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl index a583ca4..3d4770c 100644 --- a/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl
@@ -91,6 +91,7 @@ .type _vpaes_encrypt_core,\@abi-omnipotent .align 16 _vpaes_encrypt_core: +.cfi_startproc mov %rdx, %r9 mov \$16, %r11 mov 240(%rdx),%eax @@ -171,6 +172,7 @@ pxor %xmm4, %xmm0 # 0 = A pshufb %xmm1, %xmm0 ret +.cfi_endproc .size _vpaes_encrypt_core,.-_vpaes_encrypt_core ## @@ -181,6 +183,7 @@ .type _vpaes_decrypt_core,\@abi-omnipotent .align 16 _vpaes_decrypt_core: +.cfi_startproc mov %rdx, %r9 # load key mov 240(%rdx),%eax movdqa %xmm9, %xmm1 @@ -277,6 +280,7 @@ pxor %xmm4, %xmm0 # 0 = A pshufb %xmm2, %xmm0 ret +.cfi_endproc .size _vpaes_decrypt_core,.-_vpaes_decrypt_core ######################################################## @@ -287,6 +291,7 @@ .type _vpaes_schedule_core,\@abi-omnipotent .align 16 _vpaes_schedule_core: +.cfi_startproc # rdi = key # rsi = size in bits # rdx = buffer @@ -453,6 +458,7 @@ pxor %xmm6, %xmm6 pxor %xmm7, %xmm7 ret +.cfi_endproc .size _vpaes_schedule_core,.-_vpaes_schedule_core ## @@ -472,6 +478,7 @@ .type _vpaes_schedule_192_smear,\@abi-omnipotent .align 16 _vpaes_schedule_192_smear: +.cfi_startproc pshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 pshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a pxor %xmm1, %xmm6 # -> c+d c 0 0 @@ -480,6 +487,7 @@ movdqa %xmm6, %xmm0 movhlps %xmm1, %xmm6 # clobber low side with zeros ret +.cfi_endproc .size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear ## @@ -503,6 +511,7 @@ .type _vpaes_schedule_round,\@abi-omnipotent .align 16 _vpaes_schedule_round: +.cfi_startproc # extract rcon from xmm8 pxor %xmm1, %xmm1 palignr \$15, %xmm8, %xmm1 @@ -556,6 +565,7 @@ pxor %xmm7, %xmm0 movdqa %xmm0, %xmm7 ret +.cfi_endproc .size _vpaes_schedule_round,.-_vpaes_schedule_round ## @@ -570,6 +580,7 @@ .type _vpaes_schedule_transform,\@abi-omnipotent .align 16 _vpaes_schedule_transform: +.cfi_startproc movdqa %xmm9, %xmm1 pandn %xmm0, %xmm1 psrld \$4, %xmm1 @@ -580,6 +591,7 @@ pshufb %xmm1, %xmm0 pxor %xmm2, %xmm0 ret +.cfi_endproc .size _vpaes_schedule_transform,.-_vpaes_schedule_transform ## @@ -608,6 +620,7 @@ .type _vpaes_schedule_mangle,\@abi-omnipotent .align 16 _vpaes_schedule_mangle: +.cfi_startproc movdqa %xmm0, %xmm4 # save xmm0 for later movdqa .Lk_mc_forward(%rip),%xmm5 test %rcx, %rcx @@ -672,6 +685,7 @@ and \$0x30, %r8 movdqu %xmm3, (%rdx) ret +.cfi_endproc .size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle # @@ -681,6 +695,7 @@ .type ${PREFIX}_set_encrypt_key,\@function,3 .align 16 ${PREFIX}_set_encrypt_key: +.cfi_startproc ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -723,12 +738,14 @@ $code.=<<___; xor %eax,%eax ret +.cfi_endproc .size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key .globl ${PREFIX}_set_decrypt_key .type ${PREFIX}_set_decrypt_key,\@function,3 .align 16 ${PREFIX}_set_decrypt_key: +.cfi_startproc ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -776,12 +793,14 @@ $code.=<<___; xor %eax,%eax ret +.cfi_endproc .size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key .globl ${PREFIX}_encrypt .type ${PREFIX}_encrypt,\@function,3 .align 16 ${PREFIX}_encrypt: +.cfi_startproc ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -819,12 +838,14 @@ ___ $code.=<<___; ret +.cfi_endproc .size ${PREFIX}_encrypt,.-${PREFIX}_encrypt .globl ${PREFIX}_decrypt .type ${PREFIX}_decrypt,\@function,3 .align 16 ${PREFIX}_decrypt: +.cfi_startproc ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -862,6 +883,7 @@ ___ $code.=<<___; ret +.cfi_endproc .size ${PREFIX}_decrypt,.-${PREFIX}_decrypt ___ { @@ -874,6 +896,7 @@ .type ${PREFIX}_cbc_encrypt,\@function,6 .align 16 ${PREFIX}_cbc_encrypt: +.cfi_startproc xchg $key,$len ___ ($len,$key)=($key,$len); @@ -944,6 +967,7 @@ $code.=<<___; .Lcbc_abort: ret +.cfi_endproc .size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt ___ } @@ -957,6 +981,7 @@ .type _vpaes_preheat,\@abi-omnipotent .align 16 _vpaes_preheat: +.cfi_startproc lea .Lk_s0F(%rip), %r10 movdqa -0x20(%r10), %xmm10 # .Lk_inv movdqa -0x10(%r10), %xmm11 # .Lk_inv+16 @@ -966,6 +991,7 @@ movdqa 0x50(%r10), %xmm15 # .Lk_sb2 movdqa 0x60(%r10), %xmm14 # .Lk_sb2+16 ret +.cfi_endproc .size _vpaes_preheat,.-_vpaes_preheat ######################################################## ## ##
diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl index d896c53..213093d 100755 --- a/crypto/perlasm/x86_64-xlate.pl +++ b/crypto/perlasm/x86_64-xlate.pl
@@ -539,6 +539,7 @@ ); my ($cfa_reg, $cfa_rsp); + my @cfa_stack; # [us]leb128 format is variable-length integer representation base # 2^128, with most significant bit of each byte being 0 denoting @@ -686,6 +687,14 @@ cfa_expression($$line))); last; }; + /remember_state/ + && do { push @cfa_stack, [$cfa_reg, $cfa_rsp]; + last; + }; + /restore_state/ + && do { ($cfa_reg, $cfa_rsp) = @{pop @cfa_stack}; + last; + }; } $self->{value} = ".cfi_$dir\t$$line" if ($dir);