Annotate leaf functions with .cfi_{startproc,endproc}

While gdb can figure it out, libunwind requires CFI directives to
unwind a leaf function, even though the directives are trivial.
Adding them matches what GCC outputs, and likely gdb has many
heuristics that less complex tools (e.g. profilers) may not.

Bug: 181
Change-Id: I25c72152de33109a29710a828aeb99c608dd0470
Reviewed-on: https://boringssl-review.googlesource.com/c/33964
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl b/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
index ab62a28..2f5ae86 100755
--- a/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
+++ b/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
@@ -1320,6 +1320,7 @@
 .type	__ecp_nistz256_mul_montq,\@abi-omnipotent
 .align	32
 __ecp_nistz256_mul_montq:
+.cfi_startproc
 	########################################################################
 	# Multiply a by b[0]
 	mov	%rax, $t1
@@ -1531,6 +1532,7 @@
 	mov	$acc1, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
 
 ################################################################################
@@ -1615,6 +1617,7 @@
 .type	__ecp_nistz256_sqr_montq,\@abi-omnipotent
 .align	32
 __ecp_nistz256_sqr_montq:
+.cfi_startproc
 	mov	%rax, $acc5
 	mulq	$acc6			# a[1]*a[0]
 	mov	%rax, $acc1
@@ -1772,6 +1775,7 @@
 	mov	$acc7, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
 ___
 
@@ -1780,6 +1784,7 @@
 .type	__ecp_nistz256_mul_montx,\@abi-omnipotent
 .align	32
 __ecp_nistz256_mul_montx:
+.cfi_startproc
 	########################################################################
 	# Multiply by b[0]
 	mulx	$acc1, $acc0, $acc1
@@ -1942,11 +1947,13 @@
 	mov	$acc1, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
 
 .type	__ecp_nistz256_sqr_montx,\@abi-omnipotent
 .align	32
 __ecp_nistz256_sqr_montx:
+.cfi_startproc
 	mulx	$acc6, $acc1, $acc2	# a[0]*a[1]
 	mulx	$acc7, $t0, $acc3	# a[0]*a[2]
 	xor	%eax, %eax
@@ -2070,6 +2077,7 @@
 	mov	$acc7, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
 ___
 }
@@ -2087,6 +2095,7 @@
 .type	ecp_nistz256_select_w5,\@abi-omnipotent
 .align	32
 ecp_nistz256_select_w5:
+.cfi_startproc
 ___
 $code.=<<___	if ($avx>1);
 	leaq	OPENSSL_ia32cap_P(%rip), %rax
@@ -2176,6 +2185,7 @@
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .LSEH_end_ecp_nistz256_select_w5:
 .size	ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
 
@@ -2185,6 +2195,7 @@
 .type	ecp_nistz256_select_w7,\@abi-omnipotent
 .align	32
 ecp_nistz256_select_w7:
+.cfi_startproc
 ___
 $code.=<<___	if ($avx>1);
 	leaq	OPENSSL_ia32cap_P(%rip), %rax
@@ -2263,6 +2274,7 @@
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .LSEH_end_ecp_nistz256_select_w7:
 .size	ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
 ___
@@ -2279,6 +2291,7 @@
 .type	ecp_nistz256_avx2_select_w5,\@abi-omnipotent
 .align	32
 ecp_nistz256_avx2_select_w5:
+.cfi_startproc
 .Lavx2_select_w5:
 	vzeroupper
 ___
@@ -2366,6 +2379,7 @@
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .LSEH_end_ecp_nistz256_avx2_select_w5:
 .size	ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
 ___
@@ -2385,6 +2399,7 @@
 .type	ecp_nistz256_avx2_select_w7,\@abi-omnipotent
 .align	32
 ecp_nistz256_avx2_select_w7:
+.cfi_startproc
 .Lavx2_select_w7:
 	vzeroupper
 ___
@@ -2487,6 +2502,7 @@
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .LSEH_end_ecp_nistz256_avx2_select_w7:
 .size	ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
 ___
@@ -2556,6 +2572,7 @@
 .type	__ecp_nistz256_add_toq,\@abi-omnipotent
 .align	32
 __ecp_nistz256_add_toq:
+.cfi_startproc
 	xor	$t4,$t4
 	add	8*0($b_ptr), $a0
 	adc	8*1($b_ptr), $a1
@@ -2583,11 +2600,13 @@
 	mov	$a3, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
 
 .type	__ecp_nistz256_sub_fromq,\@abi-omnipotent
 .align	32
 __ecp_nistz256_sub_fromq:
+.cfi_startproc
 	sub	8*0($b_ptr), $a0
 	sbb	8*1($b_ptr), $a1
 	 mov	$a0, $t0
@@ -2614,11 +2633,13 @@
 	mov	$a3, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
 
 .type	__ecp_nistz256_subq,\@abi-omnipotent
 .align	32
 __ecp_nistz256_subq:
+.cfi_startproc
 	sub	$a0, $t0
 	sbb	$a1, $t1
 	 mov	$t0, $a0
@@ -2641,11 +2662,13 @@
 	cmovnz	$t3, $a3
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_subq,.-__ecp_nistz256_subq
 
 .type	__ecp_nistz256_mul_by_2q,\@abi-omnipotent
 .align	32
 __ecp_nistz256_mul_by_2q:
+.cfi_startproc
 	xor	$t4, $t4
 	add	$a0, $a0		# a0:a3+a0:a3
 	adc	$a1, $a1
@@ -2673,6 +2696,7 @@
 	mov	$a3, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
 ___
 									}
@@ -3653,6 +3677,7 @@
 .type	__ecp_nistz256_add_tox,\@abi-omnipotent
 .align	32
 __ecp_nistz256_add_tox:
+.cfi_startproc
 	xor	$t4, $t4
 	adc	8*0($b_ptr), $a0
 	adc	8*1($b_ptr), $a1
@@ -3681,11 +3706,13 @@
 	mov	$a3, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
 
 .type	__ecp_nistz256_sub_fromx,\@abi-omnipotent
 .align	32
 __ecp_nistz256_sub_fromx:
+.cfi_startproc
 	xor	$t4, $t4
 	sbb	8*0($b_ptr), $a0
 	sbb	8*1($b_ptr), $a1
@@ -3714,11 +3741,13 @@
 	mov	$a3, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
 
 .type	__ecp_nistz256_subx,\@abi-omnipotent
 .align	32
 __ecp_nistz256_subx:
+.cfi_startproc
 	xor	$t4, $t4
 	sbb	$a0, $t0
 	sbb	$a1, $t1
@@ -3743,11 +3772,13 @@
 	cmovc	$t3, $a3
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_subx,.-__ecp_nistz256_subx
 
 .type	__ecp_nistz256_mul_by_2x,\@abi-omnipotent
 .align	32
 __ecp_nistz256_mul_by_2x:
+.cfi_startproc
 	xor	$t4, $t4
 	adc	$a0, $a0		# a0:a3+a0:a3
 	adc	$a1, $a1
@@ -3776,6 +3807,7 @@
 	mov	$a3, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
 ___
 									}