Add Intel Indirect Branch Tracking support.
This allows operating systems to insist on IBT
enforcement as an exploit mitigation mechanism without
needing to make an exception for anything using a
bundled boringssl, such as chrome, mono, and qtwebengine.
Change-Id: Iac28dd3d2af177b89ffde10ae97bce23739feb94
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/60625
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: Bob Beck <bbe@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
diff --git a/crypto/chacha/asm/chacha-x86_64.pl b/crypto/chacha/asm/chacha-x86_64.pl
index 31384de..418044c 100755
--- a/crypto/chacha/asm/chacha-x86_64.pl
+++ b/crypto/chacha/asm/chacha-x86_64.pl
@@ -231,6 +231,7 @@
.align 64
ChaCha20_ctr32:
.cfi_startproc
+ _CET_ENDBR
cmp \$0,$len
je .Lno_data
mov OPENSSL_ia32cap_P+4(%rip),%r10
diff --git a/crypto/cipher_extra/asm/aes128gcmsiv-x86_64.pl b/crypto/cipher_extra/asm/aes128gcmsiv-x86_64.pl
index f7d419b..e044259 100644
--- a/crypto/cipher_extra/asm/aes128gcmsiv-x86_64.pl
+++ b/crypto/cipher_extra/asm/aes128gcmsiv-x86_64.pl
@@ -134,6 +134,7 @@
.align 16
aesgcmsiv_htable_init:
.cfi_startproc
+ _CET_ENDBR
vmovdqa ($H), $T
vmovdqa $T, $TMP0
vmovdqa $T, ($Htbl) # H
@@ -174,6 +175,7 @@
.align 16
aesgcmsiv_htable6_init:
.cfi_startproc
+ _CET_ENDBR
vmovdqa ($H), $T
vmovdqa $T, $TMP0
vmovdqa $T, ($Htbl) # H
@@ -235,6 +237,7 @@
.align 16
aesgcmsiv_htable_polyval:
.cfi_startproc
+ _CET_ENDBR
test $len, $len
jnz .Lhtable_polyval_start
ret
@@ -420,6 +423,7 @@
.align 16
aesgcmsiv_polyval_horner:
.cfi_startproc
+ _CET_ENDBR
test $L, $L
jnz .Lpolyval_horner_start
ret
@@ -460,6 +464,7 @@
.align 16
aes128gcmsiv_aes_ks:
.cfi_startproc
+ _CET_ENDBR
vmovdqu (%rdi), %xmm1 # xmm1 = user key
vmovdqa %xmm1, (%rsi) # rsi points to output
@@ -521,6 +526,7 @@
.align 16
aes256gcmsiv_aes_ks:
.cfi_startproc
+ _CET_ENDBR
vmovdqu (%rdi), %xmm1
vmovdqu 16(%rdi), %xmm3
vmovdqa %xmm1, (%rsi)
@@ -614,6 +620,7 @@
.align 16
aes128gcmsiv_aes_ks_enc_x1:
.cfi_startproc
+ _CET_ENDBR
vmovdqa (%rcx), %xmm1 # xmm1 = first 16 bytes of random key
vmovdqa 0*16(%rdi), $BLOCK1
@@ -687,6 +694,7 @@
.align 16
aes128gcmsiv_kdf:
.cfi_startproc
+ _CET_ENDBR
# parameter 1: %rdi Pointer to NONCE
# parameter 2: %rsi Pointer to CT
# parameter 4: %rdx Pointer to keys
@@ -787,6 +795,7 @@
.align 16
aes128gcmsiv_enc_msg_x4:
.cfi_startproc
+ _CET_ENDBR
test $LEN, $LEN
jnz .L128_enc_msg_x4_start
ret
@@ -984,6 +993,7 @@
.align 16
aes128gcmsiv_enc_msg_x8:
.cfi_startproc
+ _CET_ENDBR
test $LEN, $LEN
jnz .L128_enc_msg_x8_start
ret
@@ -1239,6 +1249,7 @@
$code.=<<___;
.cfi_startproc
+ _CET_ENDBR
test \$~15, $LEN
jnz .L${labelPrefix}_dec_start
ret
@@ -1578,6 +1589,7 @@
.align 16
aes128gcmsiv_ecb_enc_block:
.cfi_startproc
+ _CET_ENDBR
vmovdqa (%rdi), $STATE_1
vpxor ($KSp), $STATE_1, $STATE_1
@@ -1670,6 +1682,7 @@
.align 16
aes256gcmsiv_aes_ks_enc_x1:
.cfi_startproc
+ _CET_ENDBR
vmovdqa con1(%rip), $CON_MASK # CON_MASK = 1,1,1,1
vmovdqa mask(%rip), $MASK_256 # MASK_256
vmovdqa ($PT), $BLOCK1
@@ -1711,6 +1724,7 @@
.align 16
aes256gcmsiv_ecb_enc_block:
.cfi_startproc
+ _CET_ENDBR
vmovdqa (%rdi), $STATE_1
vpxor ($KSp), $STATE_1, $STATE_1
vaesenc 1*16($KSp), $STATE_1, $STATE_1
@@ -1794,6 +1808,7 @@
.align 16
aes256gcmsiv_enc_msg_x4:
.cfi_startproc
+ _CET_ENDBR
test $LEN, $LEN
jnz .L256_enc_msg_x4_start
ret
@@ -1994,6 +2009,7 @@
.align 16
aes256gcmsiv_enc_msg_x8:
.cfi_startproc
+ _CET_ENDBR
test $LEN, $LEN
jnz .L256_enc_msg_x8_start
ret
@@ -2200,6 +2216,7 @@
.align 16
aes256gcmsiv_kdf:
.cfi_startproc
+ _CET_ENDBR
# parameter 1: %rdi Pointer to NONCE
# parameter 2: %rsi Pointer to CT
# parameter 4: %rdx Pointer to keys
diff --git a/crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl b/crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl
index f0430c3..fb11760 100644
--- a/crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl
+++ b/crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl
@@ -449,6 +449,7 @@
.align 64
chacha20_poly1305_open:
.cfi_startproc
+ _CET_ENDBR
push %rbp
.cfi_push %rbp
push %rbx
@@ -871,6 +872,7 @@
.align 64
chacha20_poly1305_seal:
.cfi_startproc
+ _CET_ENDBR
push %rbp
.cfi_push %rbp
push %rbx
diff --git a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
index 215611f..414b76b 100644
--- a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
@@ -275,6 +275,7 @@
.align 16
${PREFIX}_encrypt:
.cfi_startproc
+ _CET_ENDBR
#ifdef BORINGSSL_DISPATCH_TEST
.extern BORINGSSL_function_hit
movb \$1,BORINGSSL_function_hit+1(%rip)
@@ -297,6 +298,7 @@
.align 16
${PREFIX}_decrypt:
.cfi_startproc
+ _CET_ENDBR
movups ($inp),$inout0 # load input
mov 240($key),$rounds # key->rounds
___
@@ -617,6 +619,7 @@
.align 16
${PREFIX}_ecb_encrypt:
.cfi_startproc
+ _CET_ENDBR
___
$code.=<<___ if ($win64);
lea -0x58(%rsp),%rsp
@@ -1203,6 +1206,7 @@
.align 16
${PREFIX}_ctr32_encrypt_blocks:
.cfi_startproc
+ _CET_ENDBR
#ifdef BORINGSSL_DISPATCH_TEST
movb \$1,BORINGSSL_function_hit(%rip)
#endif
@@ -1781,6 +1785,7 @@
.align 16
${PREFIX}_xts_encrypt:
.cfi_startproc
+ _CET_ENDBR
lea (%rsp),%r11 # frame pointer
.cfi_def_cfa_register %r11
push %rbp
@@ -2264,6 +2269,7 @@
.align 16
${PREFIX}_xts_decrypt:
.cfi_startproc
+ _CET_ENDBR
lea (%rsp),%r11 # frame pointer
.cfi_def_cfa_register %r11
push %rbp
@@ -2782,6 +2788,7 @@
.align 16
${PREFIX}_cbc_encrypt:
.cfi_startproc
+ _CET_ENDBR
test $len,$len # check length
jz .Lcbc_ret
@@ -3331,6 +3338,7 @@
.align 16
${PREFIX}_set_decrypt_key:
.cfi_startproc
+ _CET_ENDBR
.byte 0x48,0x83,0xEC,0x08 # sub rsp,8
.cfi_adjust_cfa_offset 8
call __aesni_set_encrypt_key
@@ -3403,6 +3411,7 @@
${PREFIX}_set_encrypt_key:
__aesni_set_encrypt_key:
.cfi_startproc
+ _CET_ENDBR
#ifdef BORINGSSL_DISPATCH_TEST
movb \$1,BORINGSSL_function_hit+3(%rip)
#endif
diff --git a/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl b/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl
index 37e93d7..4d0d38f 100644
--- a/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl
@@ -871,6 +871,7 @@
.align 16
${PREFIX}_set_encrypt_key:
.cfi_startproc
+ _CET_ENDBR
#ifdef BORINGSSL_DISPATCH_TEST
.extern BORINGSSL_function_hit
movb \$1, BORINGSSL_function_hit+5(%rip)
@@ -926,6 +927,7 @@
.align 16
${PREFIX}_set_decrypt_key:
.cfi_startproc
+ _CET_ENDBR
___
$code.=<<___ if ($win64);
lea -0xb8(%rsp),%rsp
@@ -981,6 +983,7 @@
.align 16
${PREFIX}_encrypt:
.cfi_startproc
+ _CET_ENDBR
#ifdef BORINGSSL_DISPATCH_TEST
.extern BORINGSSL_function_hit
movb \$1, BORINGSSL_function_hit+4(%rip)
@@ -1030,6 +1033,7 @@
.align 16
${PREFIX}_decrypt:
.cfi_startproc
+ _CET_ENDBR
___
$code.=<<___ if ($win64);
lea -0xb8(%rsp),%rsp
@@ -1081,6 +1085,7 @@
.align 16
${PREFIX}_cbc_encrypt:
.cfi_startproc
+ _CET_ENDBR
xchg $key,$len
___
($len,$key)=($key,$len);
@@ -1166,6 +1171,7 @@
.align 16
${PREFIX}_ctr32_encrypt_blocks:
.cfi_startproc
+ _CET_ENDBR
# _vpaes_encrypt_core and _vpaes_encrypt_core_2x expect the key in %rdx.
xchg $key, $blocks
___
diff --git a/crypto/fipsmodule/bn/asm/rsaz-avx2.pl b/crypto/fipsmodule/bn/asm/rsaz-avx2.pl
index 9be0b14..733722c 100755
--- a/crypto/fipsmodule/bn/asm/rsaz-avx2.pl
+++ b/crypto/fipsmodule/bn/asm/rsaz-avx2.pl
@@ -112,6 +112,7 @@
.align 64
rsaz_1024_sqr_avx2: # 702 cycles, 14% faster than rsaz_1024_mul_avx2
.cfi_startproc
+ _CET_ENDBR
lea (%rsp), %rax
.cfi_def_cfa_register %rax
push %rbx
@@ -863,6 +864,7 @@
.align 64
rsaz_1024_mul_avx2:
.cfi_startproc
+ _CET_ENDBR
lea (%rsp), %rax
.cfi_def_cfa_register %rax
push %rbx
@@ -1474,6 +1476,7 @@
.align 32
rsaz_1024_red2norm_avx2:
.cfi_startproc
+ _CET_ENDBR
sub \$-128,$inp # size optimization
xor %rax,%rax
___
@@ -1515,6 +1518,7 @@
.align 32
rsaz_1024_norm2red_avx2:
.cfi_startproc
+ _CET_ENDBR
sub \$-128,$out # size optimization
mov ($inp),@T[0]
mov \$0x1fffffff,%eax
@@ -1559,6 +1563,7 @@
.align 32
rsaz_1024_scatter5_avx2:
.cfi_startproc
+ _CET_ENDBR
vzeroupper
vmovdqu .Lscatter_permd(%rip),%ymm5
shl \$4,$power
@@ -1586,6 +1591,7 @@
.align 32
rsaz_1024_gather5_avx2:
.cfi_startproc
+ _CET_ENDBR
vzeroupper
mov %rsp,%r11
.cfi_def_cfa_register %r11
diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont.pl b/crypto/fipsmodule/bn/asm/x86_64-mont.pl
index 8b34ae3..be4c69b 100755
--- a/crypto/fipsmodule/bn/asm/x86_64-mont.pl
+++ b/crypto/fipsmodule/bn/asm/x86_64-mont.pl
@@ -92,6 +92,7 @@
.align 16
bn_mul_mont:
.cfi_startproc
+ _CET_ENDBR
mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
index 67ffc4a..88d98af 100755
--- a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
+++ b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
@@ -79,6 +79,7 @@
.align 64
bn_mul_mont_gather5:
.cfi_startproc
+ _CET_ENDBR
mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
@@ -1098,6 +1099,7 @@
.align 32
bn_power5:
.cfi_startproc
+ _CET_ENDBR
mov %rsp,%rax
.cfi_def_cfa_register %rax
___
@@ -1240,6 +1242,7 @@
bn_sqr8x_internal:
__bn_sqr8x_internal:
.cfi_startproc
+ _CET_ENDBR
##############################################################
# Squaring part:
#
@@ -2737,6 +2740,7 @@
bn_sqrx8x_internal:
__bn_sqrx8x_internal:
.cfi_startproc
+ _CET_ENDBR
##################################################################
# Squaring part:
#
@@ -3448,6 +3452,7 @@
.align 16
bn_scatter5:
.cfi_startproc
+ _CET_ENDBR
cmp \$0, $num
jz .Lscatter_epilogue
@@ -3478,6 +3483,7 @@
bn_gather5:
.cfi_startproc
.LSEH_begin_bn_gather5: # Win64 thing, but harmless in other cases
+ _CET_ENDBR
# I can't trust assembler to use specific encoding:-(
.byte 0x4c,0x8d,0x14,0x24 #lea (%rsp),%r10
.cfi_def_cfa_register %r10
diff --git a/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl b/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
index 0701996..b6e0384 100755
--- a/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
+++ b/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
@@ -98,6 +98,7 @@
.align 32
ecp_nistz256_neg:
.cfi_startproc
+ _CET_ENDBR
push %r12
.cfi_push %r12
push %r13
@@ -166,6 +167,7 @@
.align 32
ecp_nistz256_ord_mul_mont:
.cfi_startproc
+ _CET_ENDBR
___
$code.=<<___ if ($addx);
leaq OPENSSL_ia32cap_P(%rip), %rcx
@@ -497,6 +499,7 @@
.align 32
ecp_nistz256_ord_sqr_mont:
.cfi_startproc
+ _CET_ENDBR
___
$code.=<<___ if ($addx);
leaq OPENSSL_ia32cap_P(%rip), %rcx
@@ -1247,6 +1250,7 @@
.align 32
ecp_nistz256_mul_mont:
.cfi_startproc
+ _CET_ENDBR
___
$code.=<<___ if ($addx);
leaq OPENSSL_ia32cap_P(%rip), %rcx
@@ -1549,6 +1553,7 @@
.align 32
ecp_nistz256_sqr_mont:
.cfi_startproc
+ _CET_ENDBR
___
$code.=<<___ if ($addx);
leaq OPENSSL_ia32cap_P(%rip), %rcx
@@ -2098,6 +2103,7 @@
.align 32
ecp_nistz256_select_w5:
.cfi_startproc
+ _CET_ENDBR
___
$code.=<<___ if ($avx>1);
leaq OPENSSL_ia32cap_P(%rip), %rax
@@ -2198,6 +2204,7 @@
.align 32
ecp_nistz256_select_w7:
.cfi_startproc
+ _CET_ENDBR
___
$code.=<<___ if ($avx>1);
leaq OPENSSL_ia32cap_P(%rip), %rax
@@ -2403,6 +2410,7 @@
ecp_nistz256_avx2_select_w7:
.cfi_startproc
.Lavx2_select_w7:
+ _CET_ENDBR
vzeroupper
___
$code.=<<___ if ($win64);
@@ -2514,6 +2522,7 @@
.type ecp_nistz256_avx2_select_w7,\@function,3
.align 32
ecp_nistz256_avx2_select_w7:
+ _CET_ENDBR
.byte 0x0f,0x0b # ud2
ret
.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
@@ -2718,6 +2727,7 @@
.align 32
ecp_nistz256_point_double:
.cfi_startproc
+ _CET_ENDBR
___
$code.=<<___ if ($addx);
leaq OPENSSL_ia32cap_P(%rip), %rcx
@@ -2970,6 +2980,7 @@
.align 32
ecp_nistz256_point_add:
.cfi_startproc
+ _CET_ENDBR
___
$code.=<<___ if ($addx);
leaq OPENSSL_ia32cap_P(%rip), %rcx
@@ -3368,6 +3379,7 @@
.align 32
ecp_nistz256_point_add_affine:
.cfi_startproc
+ _CET_ENDBR
___
$code.=<<___ if ($addx);
leaq OPENSSL_ia32cap_P(%rip), %rcx
diff --git a/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl b/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl
index b9ec96d..1600d4c 100644
--- a/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl
+++ b/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl
@@ -154,6 +154,7 @@
.align 32
beeu_mod_inverse_vartime:
.cfi_startproc
+ _CET_ENDBR
push %rbp
.cfi_push rbp
push %r12
diff --git a/crypto/fipsmodule/md5/asm/md5-x86_64.pl b/crypto/fipsmodule/md5/asm/md5-x86_64.pl
index 4c96bba..655ea09 100644
--- a/crypto/fipsmodule/md5/asm/md5-x86_64.pl
+++ b/crypto/fipsmodule/md5/asm/md5-x86_64.pl
@@ -131,6 +131,7 @@
.type md5_block_asm_data_order,\@function,3
md5_block_asm_data_order:
.cfi_startproc
+ _CET_ENDBR
push %rbp
.cfi_push rbp
push %rbx
diff --git a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
index 7c235f8..2530213 100644
--- a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
@@ -442,6 +442,7 @@
aesni_gcm_decrypt:
.cfi_startproc
.seh_startproc
+ _CET_ENDBR
xor %rax,%rax
# We call |_aesni_ctr32_ghash_6x|, which requires at least 96 (0x60)
@@ -715,6 +716,7 @@
aesni_gcm_encrypt:
.cfi_startproc
.seh_startproc
+ _CET_ENDBR
#ifdef BORINGSSL_DISPATCH_TEST
.extern BORINGSSL_function_hit
movb \$1,BORINGSSL_function_hit+2(%rip)
@@ -1089,6 +1091,7 @@
.globl aesni_gcm_encrypt
.type aesni_gcm_encrypt,\@abi-omnipotent
aesni_gcm_encrypt:
+ _CET_ENDBR
xor %eax,%eax
ret
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
@@ -1096,6 +1099,7 @@
.globl aesni_gcm_decrypt
.type aesni_gcm_decrypt,\@abi-omnipotent
aesni_gcm_decrypt:
+ _CET_ENDBR
xor %eax,%eax
ret
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
diff --git a/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl b/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl
index 00364f0..4a850f4 100644
--- a/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl
@@ -104,6 +104,7 @@
gcm_gmult_ssse3:
.cfi_startproc
.seh_startproc
+ _CET_ENDBR
____
$code .= <<____ if ($win64);
subq \$40, %rsp
@@ -246,6 +247,7 @@
gcm_ghash_ssse3:
.cfi_startproc
.seh_startproc
+ _CET_ENDBR
____
$code .= <<____ if ($win64);
subq \$56, %rsp
diff --git a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
index 19b18cc..8ad1104 100644
--- a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
@@ -206,6 +206,7 @@
gcm_init_clmul:
.cfi_startproc
.seh_startproc
+ _CET_ENDBR
.L_init_clmul:
___
$code.=<<___ if ($win64);
@@ -288,6 +289,7 @@
.align 16
gcm_gmult_clmul:
.cfi_startproc
+ _CET_ENDBR
.L_gmult_clmul:
movdqu ($Xip),$Xi
movdqa .Lbswap_mask(%rip),$T3
@@ -340,6 +342,7 @@
gcm_ghash_clmul:
.cfi_startproc
.seh_startproc
+ _CET_ENDBR
.L_ghash_clmul:
___
$code.=<<___ if ($win64);
@@ -708,6 +711,7 @@
.align 32
gcm_init_avx:
.cfi_startproc
+ _CET_ENDBR
___
if ($avx) {
my ($Htbl,$Xip)=@_4args;
@@ -853,6 +857,7 @@
.align 32
gcm_gmult_avx:
.cfi_startproc
+ _CET_ENDBR
jmp .L_gmult_clmul
.cfi_endproc
.size gcm_gmult_avx,.-gcm_gmult_avx
@@ -864,6 +869,7 @@
.align 32
gcm_ghash_avx:
.cfi_startproc
+ _CET_ENDBR
___
if ($avx) {
my ($Xip,$Htbl,$inp,$len)=@_4args;
diff --git a/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl b/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl
index ac442a9..c0cabe2 100644
--- a/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl
+++ b/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl
@@ -46,6 +46,7 @@
.align 16
CRYPTO_rdrand:
.cfi_startproc
+ _CET_ENDBR
xorq %rax, %rax
rdrand $tmp1
# An add-with-carry of zero effectively sets %rax to the carry flag.
@@ -64,6 +65,7 @@
.align 16
CRYPTO_rdrand_multiple8_buf:
.cfi_startproc
+ _CET_ENDBR
test $len, $len
jz .Lout
movq \$8, $tmp1
diff --git a/crypto/fipsmodule/sha/asm/sha1-x86_64.pl b/crypto/fipsmodule/sha/asm/sha1-x86_64.pl
index d9afacb..c2c8921 100755
--- a/crypto/fipsmodule/sha/asm/sha1-x86_64.pl
+++ b/crypto/fipsmodule/sha/asm/sha1-x86_64.pl
@@ -244,6 +244,7 @@
.align 16
sha1_block_data_order:
.cfi_startproc
+ _CET_ENDBR
leaq OPENSSL_ia32cap_P(%rip),%r10
mov 0(%r10),%r9d
mov 4(%r10),%r8d
diff --git a/crypto/fipsmodule/sha/asm/sha512-x86_64.pl b/crypto/fipsmodule/sha/asm/sha512-x86_64.pl
index e831ae5..35e88d9 100755
--- a/crypto/fipsmodule/sha/asm/sha512-x86_64.pl
+++ b/crypto/fipsmodule/sha/asm/sha512-x86_64.pl
@@ -263,6 +263,7 @@
.align 16
$func:
.cfi_startproc
+ _CET_ENDBR
___
$code.=<<___ if ($SZ==4 || $avx);
leaq OPENSSL_ia32cap_P(%rip),%r11
diff --git a/crypto/hrss/asm/poly_rq_mul.S b/crypto/hrss/asm/poly_rq_mul.S
index 6d8b423..5241953 100644
--- a/crypto/hrss/asm/poly_rq_mul.S
+++ b/crypto/hrss/asm/poly_rq_mul.S
@@ -301,6 +301,7 @@
.att_syntax prefix
poly_Rq_mul:
.cfi_startproc
+_CET_ENDBR
push %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset rbp, -16
diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl
index b998050..e9fc322 100755
--- a/crypto/perlasm/x86_64-xlate.pl
+++ b/crypto/perlasm/x86_64-xlate.pl
@@ -1499,6 +1499,7 @@
\%define XMMWORD
\%define YMMWORD
\%define ZMMWORD
+\%define _CET_ENDBR
\%ifdef BORINGSSL_PREFIX
\%include "boringssl_prefix_symbols_nasm.inc"
diff --git a/crypto/test/asm/trampoline-x86_64.pl b/crypto/test/asm/trampoline-x86_64.pl
index 75c85ec..53b4bcd 100755
--- a/crypto/test/asm/trampoline-x86_64.pl
+++ b/crypto/test/asm/trampoline-x86_64.pl
@@ -141,6 +141,7 @@
abi_test_trampoline:
.cfi_startproc
.seh_startproc
+ _CET_ENDBR
# Stack layout:
# 8 bytes - align
# $caller_state_size bytes - saved caller registers
@@ -307,6 +308,7 @@
.globl abi_test_clobber_r$_
.align 16
abi_test_clobber_r$_:
+ _CET_ENDBR
xorq %r$_, %r$_
ret
.size abi_test_clobber_r$_,.-abi_test_clobber_r$_
@@ -319,6 +321,7 @@
.globl abi_test_clobber_xmm$_
.align 16
abi_test_clobber_xmm$_:
+ _CET_ENDBR
pxor %xmm$_, %xmm$_
ret
.size abi_test_clobber_xmm$_,.-abi_test_clobber_xmm$_
@@ -335,6 +338,7 @@
abi_test_bad_unwind_wrong_register:
.cfi_startproc
.seh_startproc
+ _CET_ENDBR
pushq %r12
.cfi_push %r13 # This should be %r13
.seh_pushreg %r13 # This should be %r13
@@ -358,6 +362,7 @@
abi_test_bad_unwind_temporary:
.cfi_startproc
.seh_startproc
+ _CET_ENDBR
pushq %r12
.cfi_push %r12
.seh_pushreg %r12
@@ -384,6 +389,7 @@
.type abi_test_set_direction_flag, \@abi-omnipotent
.globl abi_test_get_and_clear_direction_flag
abi_test_get_and_clear_direction_flag:
+ _CET_ENDBR
pushfq
popq %rax
andq \$0x400, %rax
@@ -397,6 +403,7 @@
.type abi_test_set_direction_flag, \@abi-omnipotent
.globl abi_test_set_direction_flag
abi_test_set_direction_flag:
+ _CET_ENDBR
std
ret
.size abi_test_set_direction_flag,.-abi_test_set_direction_flag
diff --git a/include/openssl/asm_base.h b/include/openssl/asm_base.h
index 57a7d4a..f22a1a2 100644
--- a/include/openssl/asm_base.h
+++ b/include/openssl/asm_base.h
@@ -33,6 +33,9 @@
//
// - The file, on aarch64, uses the macros defined below to be compatible with
// BTI and PAC.
+//
+// - The file, on X86_64, requires the progrram to be compatible with Intel IBT
+// and SHSTK
#if defined(__ASSEMBLER__)
@@ -47,6 +50,22 @@
.popsection
#endif
+#if defined(__CET__) && defined(OPENSSL_X86_64)
+// Clang and GCC define __CET__ and provide <cet.h> when they support Intel's
+// Indirect Branch Tracking.
+// https://lpc.events/event/7/contributions/729/attachments/496/903/CET-LPC-2020.pdf
+//
+// cet.h defines _CET_ENDBR which is used to mark function entry points for IBT.
+// and adds the assembly marker. The value of _CET_ENDBR is made dependant on if
+// '-fcf-protection' is passed to the compiler. _CET_ENDBR is only required when
+// the function is the target of an indirect jump, but BoringSSL chooses to mark
+// all assembly entry points because it is easier, and allows BoringSSL's ABI
+// tester to call the assembly entry points via an indirect jump.
+#include <cet.h>
+#else
+#define _CET_ENDBR
+#endif
+
#if defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
// We require the ARM assembler provide |__ARM_ARCH| from Arm C Language
diff --git a/third_party/fiat/asm/fiat_curve25519_adx_mul.S b/third_party/fiat/asm/fiat_curve25519_adx_mul.S
index dd7c225..b5d2aae 100644
--- a/third_party/fiat/asm/fiat_curve25519_adx_mul.S
+++ b/third_party/fiat/asm/fiat_curve25519_adx_mul.S
@@ -17,6 +17,7 @@
#endif
.cfi_startproc
+_CET_ENDBR
mov [rsp - 0x08], rbp
.cfi_offset rbp, -8-0x08
mov rbp, rsp
diff --git a/third_party/fiat/asm/fiat_curve25519_adx_square.S b/third_party/fiat/asm/fiat_curve25519_adx_square.S
index 13fa8a8..0b876ab 100644
--- a/third_party/fiat/asm/fiat_curve25519_adx_square.S
+++ b/third_party/fiat/asm/fiat_curve25519_adx_square.S
@@ -17,6 +17,7 @@
#endif
.cfi_startproc
+_CET_ENDBR
mov [rsp - 0x08], rbp
.cfi_offset rbp, -8-0x08
mov rbp, rsp