Patch out the aes_nohw fallback in bsaes_cbc_encrypt. This plugs all bsaes fallback leaks for CBC outside of the key schedule. The CBC EVP_CIPHERs never call the block function directly when there's a stream.cbc function available. This affects CBC decryptions of length < 128 or 16 mod 128. Performance-wise, we don't really care about CBC apart from passing glances at its use in TLS. There, the Lucky13 workaround mutes the effects. Cortex-A53 (Raspberry Pi 3 Model B+) Before: Did 78000 AES-128-CBC-SHA1 (16 bytes) open operations in 3020254us (25825.6 ops/sec): 0.4 MB/s Did 75000 AES-128-CBC-SHA1 (32 bytes) open operations in 3005760us (24952.1 ops/sec): 0.8 MB/s Did 71000 AES-128-CBC-SHA1 (64 bytes) open operations in 3038137us (23369.6 ops/sec): 1.5 MB/s Did 67000 AES-128-CBC-SHA1 (96 bytes) open operations in 3027686us (22129.1 ops/sec): 2.1 MB/s Did 64000 AES-128-CBC-SHA1 (112 bytes) open operations in 3005491us (21294.4 ops/sec): 2.4 MB/s Did 59000 AES-128-CBC-SHA1 (128 bytes) open operations in 3020083us (19535.9 ops/sec): 2.5 MB/s Did 53000 AES-128-CBC-SHA1 (240 bytes) open operations in 3020105us (17549.1 ops/sec): 4.2 MB/s After: Did 71668 AES-128-CBC-SHA1 (16 bytes) open operations in 3020896us (23724.1 ops/sec): 0.4 MB/s Did 71000 AES-128-CBC-SHA1 (32 bytes) open operations in 3040826us (23348.9 ops/sec): 0.7 MB/s Did 68000 AES-128-CBC-SHA1 (64 bytes) open operations in 3009913us (22592.0 ops/sec): 1.4 MB/s Did 66000 AES-128-CBC-SHA1 (96 bytes) open operations in 3007597us (21944.4 ops/sec): 2.1 MB/s Did 59000 AES-128-CBC-SHA1 (112 bytes) open operations in 3002878us (19647.8 ops/sec): 2.2 MB/s Did 59000 AES-128-CBC-SHA1 (128 bytes) open operations in 3046786us (19364.7 ops/sec): 2.5 MB/s Did 50000 AES-128-CBC-SHA1 (240 bytes) open operations in 3043643us (16427.7 ops/sec): 3.9 MB/s Penryn (Mac mini, mid 2010) Before: Did 152000 AES-128-CBC-SHA1 (16 bytes) open operations in 1004422us (151330.8 ops/sec): 2.4 MB/s Did 143000 AES-128-CBC-SHA1 (32 bytes) open operations in 1000443us (142936.7 ops/sec): 4.6 MB/s Did 136000 AES-128-CBC-SHA1 (48 bytes) open operations in 1006580us (135111.0 ops/sec): 6.5 MB/s Did 146000 AES-128-CBC-SHA1 (96 bytes) open operations in 1005731us (145168.0 ops/sec): 13.9 MB/s Did 138000 AES-128-CBC-SHA1 (112 bytes) open operations in 1003330us (137542.0 ops/sec): 15.4 MB/s Did 133000 AES-128-CBC-SHA1 (128 bytes) open operations in 1005876us (132223.1 ops/sec): 16.9 MB/s Did 117000 AES-128-CBC-SHA1 (240 bytes) open operations in 1004922us (116426.9 ops/sec): 27.9 MB/s After: Did 159000 AES-128-CBC-SHA1 (16 bytes) open operations in 1000505us (158919.7 ops/sec): 2.5 MB/s Did 157000 AES-128-CBC-SHA1 (32 bytes) open operations in 1006091us (156049.5 ops/sec): 5.0 MB/s Did 154000 AES-128-CBC-SHA1 (48 bytes) open operations in 1002720us (153582.3 ops/sec): 7.4 MB/s Did 146000 AES-128-CBC-SHA1 (96 bytes) open operations in 1002567us (145626.2 ops/sec): 14.0 MB/s Did 135000 AES-128-CBC-SHA1 (112 bytes) open operations in 1001212us (134836.6 ops/sec): 15.1 MB/s Did 133000 AES-128-CBC-SHA1 (128 bytes) open operations in 1006441us (132148.8 ops/sec): 16.9 MB/s Did 115000 AES-128-CBC-SHA1 (240 bytes) open operations in 1005246us (114399.9 ops/sec): 27.5 MB/s Bug: 256 Change-Id: I864b4455ada0d4d245380fce6f869dabb0686354 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/35167 Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/aes/asm/bsaes-armv7.pl b/crypto/fipsmodule/aes/asm/bsaes-armv7.pl index c6e0b17..d4db3b4 100644 --- a/crypto/fipsmodule/aes/asm/bsaes-armv7.pl +++ b/crypto/fipsmodule/aes/asm/bsaes-armv7.pl
@@ -1113,26 +1113,12 @@ my ($keysched)=("sp"); $code.=<<___; -@ TODO(davidben): This should be aes_nohw_cbc_encrypt, but that function does -@ not exist. Rather than add it, patch this fallback out. See -@ https://crbug.com/boringssl/256. -.extern AES_cbc_encrypt -.extern aes_nohw_decrypt - .global bsaes_cbc_encrypt .type bsaes_cbc_encrypt,%function .align 5 bsaes_cbc_encrypt: -#ifndef __KERNEL__ - cmp $len, #128 -#ifndef __thumb__ - blo AES_cbc_encrypt -#else - bhs 1f - b AES_cbc_encrypt -1: -#endif -#endif + @ In OpenSSL, this function had a fallback to aes_nohw_cbc_encrypt for + @ short inputs. We patch this out, using bsaes for all input sizes. @ it is up to the caller to make sure we are called with enc == 0 @@ -1230,10 +1216,7 @@ adds $len, $len, #8 beq .Lcbc_dec_done - vld1.8 {@XMM[0]}, [$inp]! @ load input - cmp $len, #2 - blo .Lcbc_dec_one - vld1.8 {@XMM[1]}, [$inp]! + @ Set up most parameters for the _bsaes_decrypt8 call. #ifndef BSAES_ASM_EXTENDED_KEY mov r4, $keysched @ pass the key #else @@ -1241,6 +1224,11 @@ #endif mov r5, $rounds vstmia $fp, {@XMM[15]} @ put aside IV + + vld1.8 {@XMM[0]}, [$inp]! @ load input + cmp $len, #2 + blo .Lcbc_dec_one + vld1.8 {@XMM[1]}, [$inp]! beq .Lcbc_dec_two vld1.8 {@XMM[2]}, [$inp]! cmp $len, #4 @@ -1358,16 +1346,11 @@ .align 4 .Lcbc_dec_one: sub $inp, $inp, #0x10 - mov $rounds, $out @ save original out pointer - mov $out, $fp @ use the iv scratch space as out buffer - mov r2, $key - vmov @XMM[4],@XMM[15] @ just in case ensure that IV - vmov @XMM[5],@XMM[0] @ and input are preserved - bl aes_nohw_decrypt - vld1.8 {@XMM[0]}, [$fp] @ load result - veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV - vmov @XMM[15], @XMM[5] @ @XMM[5] holds input - vst1.8 {@XMM[0]}, [$rounds] @ write output + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[15]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vst1.8 {@XMM[0]}, [$out]! @ write output .Lcbc_dec_done: #ifndef BSAES_ASM_EXTENDED_KEY
diff --git a/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl b/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl index 899490f..3bb2819 100644 --- a/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
@@ -811,8 +811,6 @@ $code.=<<___; .text -.extern aes_nohw_decrypt - .type _bsaes_encrypt8,\@abi-omnipotent .align 64 _bsaes_encrypt8: @@ -1608,22 +1606,14 @@ ___ } $code.=<<___; -.extern aes_nohw_cbc_encrypt .globl bsaes_cbc_encrypt .type bsaes_cbc_encrypt,\@abi-omnipotent .align 16 bsaes_cbc_encrypt: .cfi_startproc -___ -$code.=<<___ if ($win64); - mov 48(%rsp),$arg6 # pull direction flag -___ -$code.=<<___; - cmp \$0,$arg6 - jne aes_nohw_cbc_encrypt - cmp \$128,$arg3 - jb aes_nohw_cbc_encrypt - + # In OpenSSL, this function had a fallback to aes_nohw_cbc_encrypt for + # short inputs or if enc is one. We patch this out, using bsaes for all + # input sizes. The caller is required to ensure enc is zero. mov %rsp, %rax .Lcbc_dec_prologue: push %rbp @@ -1682,6 +1672,8 @@ movdqu (%rbx), @XMM[15] # load IV sub \$8,$len + jc .Lcbc_dec_loop_done + .Lcbc_dec_loop: movdqu 0x00($inp), @XMM[0] # load input movdqu 0x10($inp), @XMM[1] @@ -1726,6 +1718,7 @@ sub \$8,$len jnc .Lcbc_dec_loop +.Lcbc_dec_loop_done: add \$8,$len jz .Lcbc_dec_done @@ -1858,13 +1851,12 @@ jmp .Lcbc_dec_done .align 16 .Lcbc_dec_one: - lea ($inp), $arg1 - lea 0x20(%rbp), $arg2 # buffer output - lea ($key), $arg3 - call aes_nohw_decrypt # doesn't touch %xmm - pxor 0x20(%rbp), @XMM[15] # ^= IV - movdqu @XMM[15], ($out) # write output - movdqa @XMM[0], @XMM[15] # IV + movdqa @XMM[15], 0x20(%rbp) # put aside IV + call _bsaes_decrypt8 + pxor 0x20(%rbp), @XMM[0] # ^= IV + movdqu 0x00($inp), @XMM[15] # IV + movdqu @XMM[0], 0x00($out) # write output + jmp .Lcbc_dec_done .Lcbc_dec_done: movdqu @XMM[15], (%rbx) # return IV
diff --git a/crypto/fipsmodule/aes/internal.h b/crypto/fipsmodule/aes/internal.h index a05abcb..63070bc 100644 --- a/crypto/fipsmodule/aes/internal.h +++ b/crypto/fipsmodule/aes/internal.h
@@ -133,7 +133,7 @@ #if defined(BSAES) // On platforms where BSAES gets defined (just above), then these functions are -// provided by asm. +// provided by asm. Note |bsaes_cbc_encrypt| requires |enc| to be zero. void bsaes_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length, const AES_KEY *key, uint8_t ivec[16], int enc); void bsaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,