Add Intel Indirect Branch Tracking support.

This allows operating systems to insist on IBT
enforcement as an exploit mitigation mechanism without
needing to make an exception for anything using a
bundled boringssl, such as chrome, mono, and qtwebengine.

Change-Id: Iac28dd3d2af177b89ffde10ae97bce23739feb94
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/60625
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: Bob Beck <bbe@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
diff --git a/crypto/chacha/asm/chacha-x86_64.pl b/crypto/chacha/asm/chacha-x86_64.pl
index 31384de..418044c 100755
--- a/crypto/chacha/asm/chacha-x86_64.pl
+++ b/crypto/chacha/asm/chacha-x86_64.pl
@@ -231,6 +231,7 @@
 .align	64
 ChaCha20_ctr32:
 .cfi_startproc
+	_CET_ENDBR
 	cmp	\$0,$len
 	je	.Lno_data
 	mov	OPENSSL_ia32cap_P+4(%rip),%r10
diff --git a/crypto/cipher_extra/asm/aes128gcmsiv-x86_64.pl b/crypto/cipher_extra/asm/aes128gcmsiv-x86_64.pl
index f7d419b..e044259 100644
--- a/crypto/cipher_extra/asm/aes128gcmsiv-x86_64.pl
+++ b/crypto/cipher_extra/asm/aes128gcmsiv-x86_64.pl
@@ -134,6 +134,7 @@
 .align 16
 aesgcmsiv_htable_init:
 .cfi_startproc
+    _CET_ENDBR
     vmovdqa ($H), $T
     vmovdqa $T, $TMP0
     vmovdqa $T, ($Htbl)      # H
@@ -174,6 +175,7 @@
 .align 16
 aesgcmsiv_htable6_init:
 .cfi_startproc
+    _CET_ENDBR
     vmovdqa ($H), $T
     vmovdqa $T, $TMP0
     vmovdqa $T, ($Htbl)      # H
@@ -235,6 +237,7 @@
 .align 16
 aesgcmsiv_htable_polyval:
 .cfi_startproc
+    _CET_ENDBR
     test  $len, $len
     jnz   .Lhtable_polyval_start
     ret
@@ -420,6 +423,7 @@
 .align 16
 aesgcmsiv_polyval_horner:
 .cfi_startproc
+    _CET_ENDBR
     test $L, $L
     jnz .Lpolyval_horner_start
     ret
@@ -460,6 +464,7 @@
 .align 16
 aes128gcmsiv_aes_ks:
 .cfi_startproc
+    _CET_ENDBR
     vmovdqu (%rdi), %xmm1           # xmm1 = user key
     vmovdqa %xmm1, (%rsi)           # rsi points to output
 
@@ -521,6 +526,7 @@
 .align 16
 aes256gcmsiv_aes_ks:
 .cfi_startproc
+    _CET_ENDBR
     vmovdqu (%rdi), %xmm1
     vmovdqu 16(%rdi), %xmm3
     vmovdqa %xmm1, (%rsi)
@@ -614,6 +620,7 @@
 .align 16
 aes128gcmsiv_aes_ks_enc_x1:
 .cfi_startproc
+    _CET_ENDBR
     vmovdqa (%rcx), %xmm1                 # xmm1 = first 16 bytes of random key
     vmovdqa 0*16(%rdi), $BLOCK1
 
@@ -687,6 +694,7 @@
 .align 16
 aes128gcmsiv_kdf:
 .cfi_startproc
+    _CET_ENDBR
 # parameter 1: %rdi                         Pointer to NONCE
 # parameter 2: %rsi                         Pointer to CT
 # parameter 4: %rdx                         Pointer to keys
@@ -787,6 +795,7 @@
 .align 16
 aes128gcmsiv_enc_msg_x4:
 .cfi_startproc
+    _CET_ENDBR
     test $LEN, $LEN
     jnz .L128_enc_msg_x4_start
     ret
@@ -984,6 +993,7 @@
 .align 16
 aes128gcmsiv_enc_msg_x8:
 .cfi_startproc
+    _CET_ENDBR
     test $LEN, $LEN
     jnz .L128_enc_msg_x8_start
     ret
@@ -1239,6 +1249,7 @@
 
   $code.=<<___;
 .cfi_startproc
+    _CET_ENDBR
     test \$~15, $LEN
     jnz .L${labelPrefix}_dec_start
     ret
@@ -1578,6 +1589,7 @@
 .align 16
 aes128gcmsiv_ecb_enc_block:
 .cfi_startproc
+    _CET_ENDBR
     vmovdqa (%rdi), $STATE_1
 
     vpxor       ($KSp), $STATE_1, $STATE_1
@@ -1670,6 +1682,7 @@
 .align 16
 aes256gcmsiv_aes_ks_enc_x1:
 .cfi_startproc
+    _CET_ENDBR
     vmovdqa con1(%rip), $CON_MASK    # CON_MASK  = 1,1,1,1
     vmovdqa mask(%rip), $MASK_256    # MASK_256
     vmovdqa ($PT), $BLOCK1
@@ -1711,6 +1724,7 @@
 .align 16
 aes256gcmsiv_ecb_enc_block:
 .cfi_startproc
+    _CET_ENDBR
     vmovdqa (%rdi), $STATE_1
     vpxor ($KSp), $STATE_1, $STATE_1
     vaesenc 1*16($KSp), $STATE_1, $STATE_1
@@ -1794,6 +1808,7 @@
 .align 16
 aes256gcmsiv_enc_msg_x4:
 .cfi_startproc
+    _CET_ENDBR
     test $LEN, $LEN
     jnz .L256_enc_msg_x4_start
     ret
@@ -1994,6 +2009,7 @@
 .align 16
 aes256gcmsiv_enc_msg_x8:
 .cfi_startproc
+    _CET_ENDBR
     test $LEN, $LEN
     jnz .L256_enc_msg_x8_start
     ret
@@ -2200,6 +2216,7 @@
 .align 16
 aes256gcmsiv_kdf:
 .cfi_startproc
+    _CET_ENDBR
 # parameter 1: %rdi                         Pointer to NONCE
 # parameter 2: %rsi                         Pointer to CT
 # parameter 4: %rdx                         Pointer to keys
diff --git a/crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl b/crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl
index f0430c3..fb11760 100644
--- a/crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl
+++ b/crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl
@@ -449,6 +449,7 @@
 .align 64
 chacha20_poly1305_open:
 .cfi_startproc
+    _CET_ENDBR
     push %rbp
 .cfi_push %rbp
     push %rbx
@@ -871,6 +872,7 @@
 .align 64
 chacha20_poly1305_seal:
 .cfi_startproc
+    _CET_ENDBR
     push %rbp
 .cfi_push %rbp
     push %rbx
diff --git a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
index 215611f..414b76b 100644
--- a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
@@ -275,6 +275,7 @@
 .align	16
 ${PREFIX}_encrypt:
 .cfi_startproc
+	_CET_ENDBR
 #ifdef BORINGSSL_DISPATCH_TEST
 .extern	BORINGSSL_function_hit
 	movb \$1,BORINGSSL_function_hit+1(%rip)
@@ -297,6 +298,7 @@
 .align	16
 ${PREFIX}_decrypt:
 .cfi_startproc
+	_CET_ENDBR
 	movups	($inp),$inout0		# load input
 	mov	240($key),$rounds	# key->rounds
 ___
@@ -617,6 +619,7 @@
 .align	16
 ${PREFIX}_ecb_encrypt:
 .cfi_startproc
+	_CET_ENDBR
 ___
 $code.=<<___ if ($win64);
 	lea	-0x58(%rsp),%rsp
@@ -1203,6 +1206,7 @@
 .align	16
 ${PREFIX}_ctr32_encrypt_blocks:
 .cfi_startproc
+	_CET_ENDBR
 #ifdef BORINGSSL_DISPATCH_TEST
 	movb \$1,BORINGSSL_function_hit(%rip)
 #endif
@@ -1781,6 +1785,7 @@
 .align	16
 ${PREFIX}_xts_encrypt:
 .cfi_startproc
+	_CET_ENDBR
 	lea	(%rsp),%r11			# frame pointer
 .cfi_def_cfa_register	%r11
 	push	%rbp
@@ -2264,6 +2269,7 @@
 .align	16
 ${PREFIX}_xts_decrypt:
 .cfi_startproc
+	_CET_ENDBR
 	lea	(%rsp),%r11			# frame pointer
 .cfi_def_cfa_register	%r11
 	push	%rbp
@@ -2782,6 +2788,7 @@
 .align	16
 ${PREFIX}_cbc_encrypt:
 .cfi_startproc
+	_CET_ENDBR
 	test	$len,$len		# check length
 	jz	.Lcbc_ret
 
@@ -3331,6 +3338,7 @@
 .align	16
 ${PREFIX}_set_decrypt_key:
 .cfi_startproc
+	_CET_ENDBR
 	.byte	0x48,0x83,0xEC,0x08	# sub rsp,8
 .cfi_adjust_cfa_offset	8
 	call	__aesni_set_encrypt_key
@@ -3403,6 +3411,7 @@
 ${PREFIX}_set_encrypt_key:
 __aesni_set_encrypt_key:
 .cfi_startproc
+	_CET_ENDBR
 #ifdef BORINGSSL_DISPATCH_TEST
 	movb \$1,BORINGSSL_function_hit+3(%rip)
 #endif
diff --git a/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl b/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl
index 37e93d7..4d0d38f 100644
--- a/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl
@@ -871,6 +871,7 @@
 .align	16
 ${PREFIX}_set_encrypt_key:
 .cfi_startproc
+	_CET_ENDBR
 #ifdef BORINGSSL_DISPATCH_TEST
 .extern        BORINGSSL_function_hit
        movb \$1, BORINGSSL_function_hit+5(%rip)
@@ -926,6 +927,7 @@
 .align	16
 ${PREFIX}_set_decrypt_key:
 .cfi_startproc
+	_CET_ENDBR
 ___
 $code.=<<___ if ($win64);
 	lea	-0xb8(%rsp),%rsp
@@ -981,6 +983,7 @@
 .align	16
 ${PREFIX}_encrypt:
 .cfi_startproc
+	_CET_ENDBR
 #ifdef BORINGSSL_DISPATCH_TEST
 .extern        BORINGSSL_function_hit
        movb \$1, BORINGSSL_function_hit+4(%rip)
@@ -1030,6 +1033,7 @@
 .align	16
 ${PREFIX}_decrypt:
 .cfi_startproc
+	_CET_ENDBR
 ___
 $code.=<<___ if ($win64);
 	lea	-0xb8(%rsp),%rsp
@@ -1081,6 +1085,7 @@
 .align	16
 ${PREFIX}_cbc_encrypt:
 .cfi_startproc
+	_CET_ENDBR
 	xchg	$key,$len
 ___
 ($len,$key)=($key,$len);
@@ -1166,6 +1171,7 @@
 .align	16
 ${PREFIX}_ctr32_encrypt_blocks:
 .cfi_startproc
+	_CET_ENDBR
 	# _vpaes_encrypt_core and _vpaes_encrypt_core_2x expect the key in %rdx.
 	xchg	$key, $blocks
 ___
diff --git a/crypto/fipsmodule/bn/asm/rsaz-avx2.pl b/crypto/fipsmodule/bn/asm/rsaz-avx2.pl
index 9be0b14..733722c 100755
--- a/crypto/fipsmodule/bn/asm/rsaz-avx2.pl
+++ b/crypto/fipsmodule/bn/asm/rsaz-avx2.pl
@@ -112,6 +112,7 @@
 .align	64
 rsaz_1024_sqr_avx2:		# 702 cycles, 14% faster than rsaz_1024_mul_avx2
 .cfi_startproc
+	_CET_ENDBR
 	lea	(%rsp), %rax
 .cfi_def_cfa_register	%rax
 	push	%rbx
@@ -863,6 +864,7 @@
 .align	64
 rsaz_1024_mul_avx2:
 .cfi_startproc
+	_CET_ENDBR
 	lea	(%rsp), %rax
 .cfi_def_cfa_register	%rax
 	push	%rbx
@@ -1474,6 +1476,7 @@
 .align	32
 rsaz_1024_red2norm_avx2:
 .cfi_startproc
+	_CET_ENDBR
 	sub	\$-128,$inp	# size optimization
 	xor	%rax,%rax
 ___
@@ -1515,6 +1518,7 @@
 .align	32
 rsaz_1024_norm2red_avx2:
 .cfi_startproc
+	_CET_ENDBR
 	sub	\$-128,$out	# size optimization
 	mov	($inp),@T[0]
 	mov	\$0x1fffffff,%eax
@@ -1559,6 +1563,7 @@
 .align	32
 rsaz_1024_scatter5_avx2:
 .cfi_startproc
+	_CET_ENDBR
 	vzeroupper
 	vmovdqu	.Lscatter_permd(%rip),%ymm5
 	shl	\$4,$power
@@ -1586,6 +1591,7 @@
 .align	32
 rsaz_1024_gather5_avx2:
 .cfi_startproc
+	_CET_ENDBR
 	vzeroupper
 	mov	%rsp,%r11
 .cfi_def_cfa_register	%r11
diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont.pl b/crypto/fipsmodule/bn/asm/x86_64-mont.pl
index 8b34ae3..be4c69b 100755
--- a/crypto/fipsmodule/bn/asm/x86_64-mont.pl
+++ b/crypto/fipsmodule/bn/asm/x86_64-mont.pl
@@ -92,6 +92,7 @@
 .align	16
 bn_mul_mont:
 .cfi_startproc
+	_CET_ENDBR
 	mov	${num}d,${num}d
 	mov	%rsp,%rax
 .cfi_def_cfa_register	%rax
diff --git a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
index 67ffc4a..88d98af 100755
--- a/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
+++ b/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
@@ -79,6 +79,7 @@
 .align	64
 bn_mul_mont_gather5:
 .cfi_startproc
+	_CET_ENDBR
 	mov	${num}d,${num}d
 	mov	%rsp,%rax
 .cfi_def_cfa_register	%rax
@@ -1098,6 +1099,7 @@
 .align	32
 bn_power5:
 .cfi_startproc
+	_CET_ENDBR
 	mov	%rsp,%rax
 .cfi_def_cfa_register	%rax
 ___
@@ -1240,6 +1242,7 @@
 bn_sqr8x_internal:
 __bn_sqr8x_internal:
 .cfi_startproc
+	_CET_ENDBR
 	##############################################################
 	# Squaring part:
 	#
@@ -2737,6 +2740,7 @@
 bn_sqrx8x_internal:
 __bn_sqrx8x_internal:
 .cfi_startproc
+	_CET_ENDBR
 	##################################################################
 	# Squaring part:
 	#
@@ -3448,6 +3452,7 @@
 .align	16
 bn_scatter5:
 .cfi_startproc
+	_CET_ENDBR
 	cmp	\$0, $num
 	jz	.Lscatter_epilogue
 
@@ -3478,6 +3483,7 @@
 bn_gather5:
 .cfi_startproc
 .LSEH_begin_bn_gather5:			# Win64 thing, but harmless in other cases
+	_CET_ENDBR
 	# I can't trust assembler to use specific encoding:-(
 	.byte	0x4c,0x8d,0x14,0x24			#lea    (%rsp),%r10
 .cfi_def_cfa_register	%r10
diff --git a/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl b/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
index 0701996..b6e0384 100755
--- a/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
+++ b/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
@@ -98,6 +98,7 @@
 .align	32
 ecp_nistz256_neg:
 .cfi_startproc
+	_CET_ENDBR
 	push	%r12
 .cfi_push	%r12
 	push	%r13
@@ -166,6 +167,7 @@
 .align	32
 ecp_nistz256_ord_mul_mont:
 .cfi_startproc
+	_CET_ENDBR
 ___
 $code.=<<___	if ($addx);
 	leaq	OPENSSL_ia32cap_P(%rip), %rcx
@@ -497,6 +499,7 @@
 .align	32
 ecp_nistz256_ord_sqr_mont:
 .cfi_startproc
+	_CET_ENDBR
 ___
 $code.=<<___	if ($addx);
 	leaq	OPENSSL_ia32cap_P(%rip), %rcx
@@ -1247,6 +1250,7 @@
 .align	32
 ecp_nistz256_mul_mont:
 .cfi_startproc
+	_CET_ENDBR
 ___
 $code.=<<___	if ($addx);
 	leaq	OPENSSL_ia32cap_P(%rip), %rcx
@@ -1549,6 +1553,7 @@
 .align	32
 ecp_nistz256_sqr_mont:
 .cfi_startproc
+	_CET_ENDBR
 ___
 $code.=<<___	if ($addx);
 	leaq	OPENSSL_ia32cap_P(%rip), %rcx
@@ -2098,6 +2103,7 @@
 .align	32
 ecp_nistz256_select_w5:
 .cfi_startproc
+	_CET_ENDBR
 ___
 $code.=<<___	if ($avx>1);
 	leaq	OPENSSL_ia32cap_P(%rip), %rax
@@ -2198,6 +2204,7 @@
 .align	32
 ecp_nistz256_select_w7:
 .cfi_startproc
+	_CET_ENDBR
 ___
 $code.=<<___	if ($avx>1);
 	leaq	OPENSSL_ia32cap_P(%rip), %rax
@@ -2403,6 +2410,7 @@
 ecp_nistz256_avx2_select_w7:
 .cfi_startproc
 .Lavx2_select_w7:
+	_CET_ENDBR
 	vzeroupper
 ___
 $code.=<<___	if ($win64);
@@ -2514,6 +2522,7 @@
 .type	ecp_nistz256_avx2_select_w7,\@function,3
 .align	32
 ecp_nistz256_avx2_select_w7:
+	_CET_ENDBR
 	.byte	0x0f,0x0b	# ud2
 	ret
 .size	ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
@@ -2718,6 +2727,7 @@
 .align	32
 ecp_nistz256_point_double:
 .cfi_startproc
+	_CET_ENDBR
 ___
 $code.=<<___	if ($addx);
 	leaq	OPENSSL_ia32cap_P(%rip), %rcx
@@ -2970,6 +2980,7 @@
 .align	32
 ecp_nistz256_point_add:
 .cfi_startproc
+	_CET_ENDBR
 ___
 $code.=<<___	if ($addx);
 	leaq	OPENSSL_ia32cap_P(%rip), %rcx
@@ -3368,6 +3379,7 @@
 .align	32
 ecp_nistz256_point_add_affine:
 .cfi_startproc
+	_CET_ENDBR
 ___
 $code.=<<___	if ($addx);
 	leaq	OPENSSL_ia32cap_P(%rip), %rcx
diff --git a/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl b/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl
index b9ec96d..1600d4c 100644
--- a/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl
+++ b/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl
@@ -154,6 +154,7 @@
 .align 32
 beeu_mod_inverse_vartime:
 .cfi_startproc
+    _CET_ENDBR
     push %rbp
 .cfi_push rbp
     push %r12
diff --git a/crypto/fipsmodule/md5/asm/md5-x86_64.pl b/crypto/fipsmodule/md5/asm/md5-x86_64.pl
index 4c96bba..655ea09 100644
--- a/crypto/fipsmodule/md5/asm/md5-x86_64.pl
+++ b/crypto/fipsmodule/md5/asm/md5-x86_64.pl
@@ -131,6 +131,7 @@
 .type md5_block_asm_data_order,\@function,3
 md5_block_asm_data_order:
 .cfi_startproc
+	_CET_ENDBR
 	push	%rbp
 .cfi_push	rbp
 	push	%rbx
diff --git a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
index 7c235f8..2530213 100644
--- a/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
@@ -442,6 +442,7 @@
 aesni_gcm_decrypt:
 .cfi_startproc
 .seh_startproc
+	_CET_ENDBR
 	xor	%rax,%rax
 
 	# We call |_aesni_ctr32_ghash_6x|, which requires at least 96 (0x60)
@@ -715,6 +716,7 @@
 aesni_gcm_encrypt:
 .cfi_startproc
 .seh_startproc
+	_CET_ENDBR
 #ifdef BORINGSSL_DISPATCH_TEST
 .extern	BORINGSSL_function_hit
 	movb \$1,BORINGSSL_function_hit+2(%rip)
@@ -1089,6 +1091,7 @@
 .globl	aesni_gcm_encrypt
 .type	aesni_gcm_encrypt,\@abi-omnipotent
 aesni_gcm_encrypt:
+	_CET_ENDBR
 	xor	%eax,%eax
 	ret
 .size	aesni_gcm_encrypt,.-aesni_gcm_encrypt
@@ -1096,6 +1099,7 @@
 .globl	aesni_gcm_decrypt
 .type	aesni_gcm_decrypt,\@abi-omnipotent
 aesni_gcm_decrypt:
+	_CET_ENDBR
 	xor	%eax,%eax
 	ret
 .size	aesni_gcm_decrypt,.-aesni_gcm_decrypt
diff --git a/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl b/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl
index 00364f0..4a850f4 100644
--- a/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl
@@ -104,6 +104,7 @@
 gcm_gmult_ssse3:
 .cfi_startproc
 .seh_startproc
+	_CET_ENDBR
 ____
 $code .= <<____ if ($win64);
 	subq	\$40, %rsp
@@ -246,6 +247,7 @@
 gcm_ghash_ssse3:
 .cfi_startproc
 .seh_startproc
+	_CET_ENDBR
 ____
 $code .= <<____ if ($win64);
 	subq	\$56, %rsp
diff --git a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
index 19b18cc..8ad1104 100644
--- a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
@@ -206,6 +206,7 @@
 gcm_init_clmul:
 .cfi_startproc
 .seh_startproc
+	_CET_ENDBR
 .L_init_clmul:
 ___
 $code.=<<___ if ($win64);
@@ -288,6 +289,7 @@
 .align	16
 gcm_gmult_clmul:
 .cfi_startproc
+	_CET_ENDBR
 .L_gmult_clmul:
 	movdqu		($Xip),$Xi
 	movdqa		.Lbswap_mask(%rip),$T3
@@ -340,6 +342,7 @@
 gcm_ghash_clmul:
 .cfi_startproc
 .seh_startproc
+	_CET_ENDBR
 .L_ghash_clmul:
 ___
 $code.=<<___ if ($win64);
@@ -708,6 +711,7 @@
 .align	32
 gcm_init_avx:
 .cfi_startproc
+	_CET_ENDBR
 ___
 if ($avx) {
 my ($Htbl,$Xip)=@_4args;
@@ -853,6 +857,7 @@
 .align	32
 gcm_gmult_avx:
 .cfi_startproc
+	_CET_ENDBR
 	jmp	.L_gmult_clmul
 .cfi_endproc
 .size	gcm_gmult_avx,.-gcm_gmult_avx
@@ -864,6 +869,7 @@
 .align	32
 gcm_ghash_avx:
 .cfi_startproc
+	_CET_ENDBR
 ___
 if ($avx) {
 my ($Xip,$Htbl,$inp,$len)=@_4args;
diff --git a/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl b/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl
index ac442a9..c0cabe2 100644
--- a/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl
+++ b/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl
@@ -46,6 +46,7 @@
 .align	16
 CRYPTO_rdrand:
 .cfi_startproc
+	_CET_ENDBR
 	xorq %rax, %rax
 	rdrand $tmp1
 	# An add-with-carry of zero effectively sets %rax to the carry flag.
@@ -64,6 +65,7 @@
 .align 16
 CRYPTO_rdrand_multiple8_buf:
 .cfi_startproc
+	_CET_ENDBR
 	test $len, $len
 	jz .Lout
 	movq \$8, $tmp1
diff --git a/crypto/fipsmodule/sha/asm/sha1-x86_64.pl b/crypto/fipsmodule/sha/asm/sha1-x86_64.pl
index d9afacb..c2c8921 100755
--- a/crypto/fipsmodule/sha/asm/sha1-x86_64.pl
+++ b/crypto/fipsmodule/sha/asm/sha1-x86_64.pl
@@ -244,6 +244,7 @@
 .align	16
 sha1_block_data_order:
 .cfi_startproc
+	_CET_ENDBR
 	leaq	OPENSSL_ia32cap_P(%rip),%r10
 	mov	0(%r10),%r9d
 	mov	4(%r10),%r8d
diff --git a/crypto/fipsmodule/sha/asm/sha512-x86_64.pl b/crypto/fipsmodule/sha/asm/sha512-x86_64.pl
index e831ae5..35e88d9 100755
--- a/crypto/fipsmodule/sha/asm/sha512-x86_64.pl
+++ b/crypto/fipsmodule/sha/asm/sha512-x86_64.pl
@@ -263,6 +263,7 @@
 .align	16
 $func:
 .cfi_startproc
+	_CET_ENDBR
 ___
 $code.=<<___ if ($SZ==4 || $avx);
 	leaq	OPENSSL_ia32cap_P(%rip),%r11
diff --git a/crypto/hrss/asm/poly_rq_mul.S b/crypto/hrss/asm/poly_rq_mul.S
index 6d8b423..5241953 100644
--- a/crypto/hrss/asm/poly_rq_mul.S
+++ b/crypto/hrss/asm/poly_rq_mul.S
@@ -301,6 +301,7 @@
 .att_syntax prefix
 poly_Rq_mul:
 .cfi_startproc
+_CET_ENDBR
 push %rbp
 .cfi_adjust_cfa_offset 8
 .cfi_offset rbp, -16
diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl
index b998050..e9fc322 100755
--- a/crypto/perlasm/x86_64-xlate.pl
+++ b/crypto/perlasm/x86_64-xlate.pl
@@ -1499,6 +1499,7 @@
 \%define XMMWORD
 \%define YMMWORD
 \%define ZMMWORD
+\%define _CET_ENDBR
 
 \%ifdef BORINGSSL_PREFIX
 \%include "boringssl_prefix_symbols_nasm.inc"
diff --git a/crypto/test/asm/trampoline-x86_64.pl b/crypto/test/asm/trampoline-x86_64.pl
index 75c85ec..53b4bcd 100755
--- a/crypto/test/asm/trampoline-x86_64.pl
+++ b/crypto/test/asm/trampoline-x86_64.pl
@@ -141,6 +141,7 @@
 abi_test_trampoline:
 .cfi_startproc
 .seh_startproc
+	_CET_ENDBR
 	# Stack layout:
 	#   8 bytes - align
 	#   $caller_state_size bytes - saved caller registers
@@ -307,6 +308,7 @@
 .globl	abi_test_clobber_r$_
 .align	16
 abi_test_clobber_r$_:
+	_CET_ENDBR
 	xorq	%r$_, %r$_
 	ret
 .size	abi_test_clobber_r$_,.-abi_test_clobber_r$_
@@ -319,6 +321,7 @@
 .globl	abi_test_clobber_xmm$_
 .align	16
 abi_test_clobber_xmm$_:
+	_CET_ENDBR
 	pxor	%xmm$_, %xmm$_
 	ret
 .size	abi_test_clobber_xmm$_,.-abi_test_clobber_xmm$_
@@ -335,6 +338,7 @@
 abi_test_bad_unwind_wrong_register:
 .cfi_startproc
 .seh_startproc
+	_CET_ENDBR
 	pushq	%r12
 .cfi_push	%r13	# This should be %r13
 .seh_pushreg	%r13	# This should be %r13
@@ -358,6 +362,7 @@
 abi_test_bad_unwind_temporary:
 .cfi_startproc
 .seh_startproc
+	_CET_ENDBR
 	pushq	%r12
 .cfi_push	%r12
 .seh_pushreg	%r12
@@ -384,6 +389,7 @@
 .type	abi_test_set_direction_flag, \@abi-omnipotent
 .globl	abi_test_get_and_clear_direction_flag
 abi_test_get_and_clear_direction_flag:
+	_CET_ENDBR
 	pushfq
 	popq	%rax
 	andq	\$0x400, %rax
@@ -397,6 +403,7 @@
 .type	abi_test_set_direction_flag, \@abi-omnipotent
 .globl	abi_test_set_direction_flag
 abi_test_set_direction_flag:
+	_CET_ENDBR
 	std
 	ret
 .size abi_test_set_direction_flag,.-abi_test_set_direction_flag
diff --git a/include/openssl/asm_base.h b/include/openssl/asm_base.h
index 57a7d4a..f22a1a2 100644
--- a/include/openssl/asm_base.h
+++ b/include/openssl/asm_base.h
@@ -33,6 +33,9 @@
 //
 // - The file, on aarch64, uses the macros defined below to be compatible with
 //   BTI and PAC.
+//
+// - The file, on X86_64, requires the progrram to be compatible with Intel IBT
+//   and SHSTK
 
 #if defined(__ASSEMBLER__)
 
@@ -47,6 +50,22 @@
 .popsection
 #endif
 
+#if defined(__CET__) && defined(OPENSSL_X86_64)
+// Clang and GCC define __CET__ and provide <cet.h> when they support Intel's
+// Indirect Branch Tracking.
+// https://lpc.events/event/7/contributions/729/attachments/496/903/CET-LPC-2020.pdf
+//
+// cet.h defines _CET_ENDBR which is used to mark function entry points for IBT.
+// and adds the assembly marker. The value of _CET_ENDBR is made dependant on if
+// '-fcf-protection' is passed to the compiler. _CET_ENDBR is only required when
+// the function is the target of an indirect jump, but BoringSSL chooses to mark
+// all assembly entry points because it is easier, and allows BoringSSL's ABI
+// tester to call the assembly entry points via an indirect jump.
+#include <cet.h>
+#else
+#define _CET_ENDBR
+#endif
+
 #if defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
 
 // We require the ARM assembler provide |__ARM_ARCH| from Arm C Language
diff --git a/third_party/fiat/asm/fiat_curve25519_adx_mul.S b/third_party/fiat/asm/fiat_curve25519_adx_mul.S
index dd7c225..b5d2aae 100644
--- a/third_party/fiat/asm/fiat_curve25519_adx_mul.S
+++ b/third_party/fiat/asm/fiat_curve25519_adx_mul.S
@@ -17,6 +17,7 @@
 #endif
 
 .cfi_startproc
+_CET_ENDBR
 mov [rsp - 0x08], rbp
 .cfi_offset rbp, -8-0x08
 mov rbp, rsp
diff --git a/third_party/fiat/asm/fiat_curve25519_adx_square.S b/third_party/fiat/asm/fiat_curve25519_adx_square.S
index 13fa8a8..0b876ab 100644
--- a/third_party/fiat/asm/fiat_curve25519_adx_square.S
+++ b/third_party/fiat/asm/fiat_curve25519_adx_square.S
@@ -17,6 +17,7 @@
 #endif
 
 .cfi_startproc
+_CET_ENDBR
 mov [rsp - 0x08], rbp
 .cfi_offset rbp, -8-0x08
 mov rbp, rsp