Sync up AES assembly.

This syncs up with OpenSSL master as of
50ea9d2b3521467a11559be41dcf05ee05feabd6. The non-license non-spelling
changes are CFI bits, which were added in upstream in
b84460ad3a3e4fcb22efaa0a8365b826f4264ecf.

Change-Id: I42280985f834d5b9133eacafc8ff9dbd2f0ea59a
Reviewed-on: https://boringssl-review.googlesource.com/25704
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/aes/asm/aes-x86_64.pl b/crypto/fipsmodule/aes/asm/aes-x86_64.pl
index 4bf7db3..8b74ef0 100755
--- a/crypto/fipsmodule/aes/asm/aes-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/aes-x86_64.pl
@@ -1,7 +1,14 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
 #
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
@@ -590,13 +597,21 @@
 .type	asm_AES_encrypt,\@function,3
 .hidden	asm_AES_encrypt
 asm_AES_encrypt:
+.cfi_startproc
 	mov	%rsp,%rax
+.cfi_def_cfa_register	%rax
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 
 	# allocate frame "above" key schedule
 	lea	-63(%rdx),%rcx	# %rdx is key argument
@@ -609,6 +624,7 @@
 
 	mov	%rsi,16(%rsp)	# save out
 	mov	%rax,24(%rsp)	# save original stack pointer
+.cfi_cfa_expression	%rsp+24,deref,+8
 .Lenc_prologue:
 
 	mov	%rdx,$key
@@ -635,20 +651,29 @@
 
 	mov	16(%rsp),$out	# restore out
 	mov	24(%rsp),%rsi	# restore saved stack pointer
+.cfi_def_cfa	%rsi,8
 	mov	$s0,0($out)	# write output vector
 	mov	$s1,4($out)
 	mov	$s2,8($out)
 	mov	$s3,12($out)
 
 	mov	-48(%rsi),%r15
+.cfi_restore	%r15
 	mov	-40(%rsi),%r14
+.cfi_restore	%r14
 	mov	-32(%rsi),%r13
+.cfi_restore	%r13
 	mov	-24(%rsi),%r12
+.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
 	lea	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Lenc_epilogue:
 	ret
+.cfi_endproc
 .size	asm_AES_encrypt,.-asm_AES_encrypt
 ___
 
@@ -1186,13 +1211,21 @@
 .type	asm_AES_decrypt,\@function,3
 .hidden	asm_AES_decrypt
 asm_AES_decrypt:
+.cfi_startproc
 	mov	%rsp,%rax
+.cfi_def_cfa_register	%rax
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 
 	# allocate frame "above" key schedule
 	lea	-63(%rdx),%rcx	# %rdx is key argument
@@ -1205,6 +1238,7 @@
 
 	mov	%rsi,16(%rsp)	# save out
 	mov	%rax,24(%rsp)	# save original stack pointer
+.cfi_cfa_expression	%rsp+24,deref,+8
 .Ldec_prologue:
 
 	mov	%rdx,$key
@@ -1233,20 +1267,29 @@
 
 	mov	16(%rsp),$out	# restore out
 	mov	24(%rsp),%rsi	# restore saved stack pointer
+.cfi_def_cfa	%rsi,8
 	mov	$s0,0($out)	# write output vector
 	mov	$s1,4($out)
 	mov	$s2,8($out)
 	mov	$s3,12($out)
 
 	mov	-48(%rsi),%r15
+.cfi_restore	%r15
 	mov	-40(%rsi),%r14
+.cfi_restore	%r14
 	mov	-32(%rsi),%r13
+.cfi_restore	%r13
 	mov	-24(%rsi),%r12
+.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
 	lea	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Ldec_epilogue:
 	ret
+.cfi_endproc
 .size	asm_AES_decrypt,.-asm_AES_decrypt
 ___
 #------------------------------------------------------------------#
@@ -1284,22 +1327,34 @@
 .globl asm_AES_set_encrypt_key
 .type  asm_AES_set_encrypt_key,\@function,3
 asm_AES_set_encrypt_key:
+.cfi_startproc
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12			# redundant, but allows to share
+.cfi_push	%r12
 	push	%r13			# exception handler...
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 	sub	\$8,%rsp
+.cfi_adjust_cfa_offset	8
 .Lenc_key_prologue:
 
 	call	_x86_64_AES_set_encrypt_key
 
 	mov	40(%rsp),%rbp
+.cfi_restore	%rbp
 	mov	48(%rsp),%rbx
+.cfi_restore	%rbx
 	add	\$56,%rsp
+.cfi_adjust_cfa_offset	-56
 .Lenc_key_epilogue:
 	ret
+.cfi_endproc
 .size asm_AES_set_encrypt_key,.-asm_AES_set_encrypt_key
 
 .type	_x86_64_AES_set_encrypt_key,\@abi-omnipotent
@@ -1549,13 +1604,21 @@
 .globl asm_AES_set_decrypt_key
 .type  asm_AES_set_decrypt_key,\@function,3
 asm_AES_set_decrypt_key:
+.cfi_startproc
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 	push	%rdx			# save key schedule
+.cfi_adjust_cfa_offset	8
 .Ldec_key_prologue:
 
 	call	_x86_64_AES_set_encrypt_key
@@ -1609,14 +1672,22 @@
 	xor	%rax,%rax
 .Labort:
 	mov	8(%rsp),%r15
+.cfi_restore	%r15
 	mov	16(%rsp),%r14
+.cfi_restore	%r14
 	mov	24(%rsp),%r13
+.cfi_restore	%r13
 	mov	32(%rsp),%r12
+.cfi_restore	%r12
 	mov	40(%rsp),%rbp
+.cfi_restore	%rbp
 	mov	48(%rsp),%rbx
+.cfi_restore	%rbx
 	add	\$56,%rsp
+.cfi_adjust_cfa_offset	-56
 .Ldec_key_epilogue:
 	ret
+.cfi_endproc
 .size	asm_AES_set_decrypt_key,.-asm_AES_set_decrypt_key
 ___
 
@@ -1645,15 +1716,23 @@
 .extern	OPENSSL_ia32cap_P
 .hidden	asm_AES_cbc_encrypt
 asm_AES_cbc_encrypt:
+.cfi_startproc
 	cmp	\$0,%rdx	# check length
 	je	.Lcbc_epilogue
 	pushfq
+.cfi_push	49		# %rflags
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lcbc_prologue:
 
 	cld
@@ -1699,8 +1778,10 @@
 .Lcbc_te_ok:
 
 	xchg	%rsp,$key
+.cfi_def_cfa_register	$key
 	#add	\$8,%rsp	# reserve for return address!
 	mov	$key,$_rsp	# save %rsp
+.cfi_cfa_expression	$_rsp,deref,+64
 .Lcbc_fast_body:
 	mov	%rdi,$_inp	# save copy of inp
 	mov	%rsi,$_out	# save copy of out
@@ -1930,7 +2011,7 @@
 	lea	($key,%rax),%rax
 	mov	%rax,$keyend
 
-	# pick Te4 copy which can't "overlap" with stack frame or key scdedule
+	# pick Te4 copy which can't "overlap" with stack frame or key schedule
 	lea	2048($sbox),$sbox
 	lea	768-8(%rsp),%rax
 	sub	$sbox,%rax
@@ -2082,17 +2163,27 @@
 .align	16
 .Lcbc_exit:
 	mov	$_rsp,%rsi
+.cfi_def_cfa	%rsi,64
 	mov	(%rsi),%r15
+.cfi_restore	%r15
 	mov	8(%rsi),%r14
+.cfi_restore	%r14
 	mov	16(%rsi),%r13
+.cfi_restore	%r13
 	mov	24(%rsi),%r12
+.cfi_restore	%r12
 	mov	32(%rsi),%rbp
+.cfi_restore	%rbp
 	mov	40(%rsi),%rbx
+.cfi_restore	%rbx
 	lea	48(%rsi),%rsp
+.cfi_def_cfa	%rsp,16
 .Lcbc_popfq:
 	popfq
+.cfi_pop	49		# %rflags
 .Lcbc_epilogue:
 	ret
+.cfi_endproc
 .size	asm_AES_cbc_encrypt,.-asm_AES_cbc_encrypt
 ___
 }
diff --git a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
index a9b3151..a186941 100644
--- a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
@@ -60,7 +60,7 @@
 # identical to CBC, because CBC-MAC is essentially CBC encrypt without
 # saving output. CCM CTR "stays invisible," because it's neatly
 # interleaved wih CBC-MAC. This provides ~30% improvement over
-# "straghtforward" CCM implementation with CTR and CBC-MAC performed
+# "straightforward" CCM implementation with CTR and CBC-MAC performed
 # disjointly. Parallelizable modes practically achieve the theoretical
 # limit.
 #
@@ -143,14 +143,14 @@
 # asymptotic, if it can be surpassed, isn't it? What happens there?
 # Rewind to CBC paragraph for the answer. Yes, out-of-order execution
 # magic is responsible for this. Processor overlaps not only the
-# additional instructions with AES ones, but even AES instuctions
+# additional instructions with AES ones, but even AES instructions
 # processing adjacent triplets of independent blocks. In the 6x case
 # additional instructions  still claim disproportionally small amount
 # of additional cycles, but in 8x case number of instructions must be
 # a tad too high for out-of-order logic to cope with, and AES unit
 # remains underutilized... As you can see 8x interleave is hardly
 # justifiable, so there no need to feel bad that 32-bit aesni-x86.pl
-# utilizies 6x interleave because of limited register bank capacity.
+# utilizes 6x interleave because of limited register bank capacity.
 #
 # Higher interleave factors do have negative impact on Westmere
 # performance. While for ECB mode it's negligible ~1.5%, other
@@ -1182,6 +1182,7 @@
 .type	aesni_ctr32_encrypt_blocks,\@function,5
 .align	16
 aesni_ctr32_encrypt_blocks:
+.cfi_startproc
 	cmp	\$1,$len
 	jne	.Lctr32_bulk
 
@@ -1204,7 +1205,9 @@
 .align	16
 .Lctr32_bulk:
 	lea	(%rsp),$key_			# use $key_ as frame pointer
+.cfi_def_cfa_register	$key_
 	push	%rbp
+.cfi_push	%rbp
 	sub	\$$frame_size,%rsp
 	and	\$-16,%rsp	# Linux kernel stack can be incorrectly seeded
 ___
@@ -1548,7 +1551,7 @@
 	sub	\$8,$len
 	jnc	.Lctr32_loop8			# loop if $len-=8 didn't borrow
 
-	add	\$8,$len			# restore real remainig $len
+	add	\$8,$len			# restore real remaining $len
 	jz	.Lctr32_done			# done if ($len==0)
 	lea	-0x80($key),$key
 
@@ -1665,7 +1668,7 @@
 	movups	$inout2,0x20($out)		# $len was 3, stop store
 
 .Lctr32_done:
-	xorps	%xmm0,%xmm0			# clear regiser bank
+	xorps	%xmm0,%xmm0			# clear register bank
 	xor	$key0,$key0
 	pxor	%xmm1,%xmm1
 	pxor	%xmm2,%xmm2
@@ -1725,9 +1728,12 @@
 ___
 $code.=<<___;
 	mov	-8($key_),%rbp
+.cfi_restore	%rbp
 	lea	($key_),%rsp
+.cfi_def_cfa_register	%rsp
 .Lctr32_epilogue:
 	ret
+.cfi_endproc
 .size	aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
 ___
 }
@@ -1749,8 +1755,11 @@
 .type	aesni_xts_encrypt,\@function,6
 .align	16
 aesni_xts_encrypt:
+.cfi_startproc
 	lea	(%rsp),%r11			# frame pointer
+.cfi_def_cfa_register	%r11
 	push	%rbp
+.cfi_push	%rbp
 	sub	\$$frame_size,%rsp
 	and	\$-16,%rsp	# Linux kernel stack can be incorrectly seeded
 ___
@@ -1848,7 +1857,7 @@
 	lea	`16*6`($inp),$inp
 	pxor	$twmask,$inout5
 
-	 pxor	$twres,@tweak[0]		# calclulate tweaks^round[last]
+	 pxor	$twres,@tweak[0]		# calculate tweaks^round[last]
 	aesenc		$rndkey1,$inout4
 	 pxor	$twres,@tweak[1]
 	 movdqa	@tweak[0],`16*0`(%rsp)		# put aside tweaks^round[last]
@@ -2215,9 +2224,12 @@
 ___
 $code.=<<___;
 	mov	-8(%r11),%rbp
+.cfi_restore	%rbp
 	lea	(%r11),%rsp
+.cfi_def_cfa_register	%rsp
 .Lxts_enc_epilogue:
 	ret
+.cfi_endproc
 .size	aesni_xts_encrypt,.-aesni_xts_encrypt
 ___
 
@@ -2226,8 +2238,11 @@
 .type	aesni_xts_decrypt,\@function,6
 .align	16
 aesni_xts_decrypt:
+.cfi_startproc
 	lea	(%rsp),%r11			# frame pointer
+.cfi_def_cfa_register	%r11
 	push	%rbp
+.cfi_push	%rbp
 	sub	\$$frame_size,%rsp
 	and	\$-16,%rsp	# Linux kernel stack can be incorrectly seeded
 ___
@@ -2328,7 +2343,7 @@
 	lea	`16*6`($inp),$inp
 	pxor	$twmask,$inout5
 
-	 pxor	$twres,@tweak[0]		# calclulate tweaks^round[last]
+	 pxor	$twres,@tweak[0]		# calculate tweaks^round[last]
 	aesdec		$rndkey1,$inout4
 	 pxor	$twres,@tweak[1]
 	 movdqa	@tweak[0],`16*0`(%rsp)		# put aside tweaks^last round key
@@ -2718,9 +2733,12 @@
 ___
 $code.=<<___;
 	mov	-8(%r11),%rbp
+.cfi_restore	%rbp
 	lea	(%r11),%rsp
+.cfi_def_cfa_register	%rsp
 .Lxts_dec_epilogue:
 	ret
+.cfi_endproc
 .size	aesni_xts_decrypt,.-aesni_xts_decrypt
 ___
 }
@@ -2745,12 +2763,18 @@
 .type	aesni_ocb_encrypt,\@function,6
 .align	32
 aesni_ocb_encrypt:
+.cfi_startproc
 	lea	(%rsp),%rax
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 ___
 $code.=<<___ if ($win64);
 	lea	-0xa0(%rsp),%rsp
@@ -2945,6 +2969,7 @@
 	pxor	%xmm14,%xmm14
 	pxor	%xmm15,%xmm15
 	lea	0x28(%rsp),%rax
+.cfi_def_cfa	%rax,8
 ___
 $code.=<<___ if ($win64);
 	movaps	0x00(%rsp),%xmm6
@@ -2972,13 +2997,20 @@
 ___
 $code.=<<___;
 	mov	-40(%rax),%r14
+.cfi_restore	%r14
 	mov	-32(%rax),%r13
+.cfi_restore	%r13
 	mov	-24(%rax),%r12
+.cfi_restore	%r12
 	mov	-16(%rax),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rax),%rbx
+.cfi_restore	%rbx
 	lea	(%rax),%rsp
+.cfi_def_cfa_register	%rsp
 .Locb_enc_epilogue:
 	ret
+.cfi_endproc
 .size	aesni_ocb_encrypt,.-aesni_ocb_encrypt
 
 .type	__ocb_encrypt6,\@abi-omnipotent
@@ -3191,12 +3223,18 @@
 .type	aesni_ocb_decrypt,\@function,6
 .align	32
 aesni_ocb_decrypt:
+.cfi_startproc
 	lea	(%rsp),%rax
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 ___
 $code.=<<___ if ($win64);
 	lea	-0xa0(%rsp),%rsp
@@ -3413,6 +3451,7 @@
 	pxor	%xmm14,%xmm14
 	pxor	%xmm15,%xmm15
 	lea	0x28(%rsp),%rax
+.cfi_def_cfa	%rax,8
 ___
 $code.=<<___ if ($win64);
 	movaps	0x00(%rsp),%xmm6
@@ -3440,13 +3479,20 @@
 ___
 $code.=<<___;
 	mov	-40(%rax),%r14
+.cfi_restore	%r14
 	mov	-32(%rax),%r13
+.cfi_restore	%r13
 	mov	-24(%rax),%r12
+.cfi_restore	%r12
 	mov	-16(%rax),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rax),%rbx
+.cfi_restore	%rbx
 	lea	(%rax),%rsp
+.cfi_def_cfa_register	%rsp
 .Locb_dec_epilogue:
 	ret
+.cfi_endproc
 .size	aesni_ocb_decrypt,.-aesni_ocb_decrypt
 
 .type	__ocb_decrypt6,\@abi-omnipotent
@@ -3659,6 +3705,7 @@
 .type	${PREFIX}_cbc_encrypt,\@function,6
 .align	16
 ${PREFIX}_cbc_encrypt:
+.cfi_startproc
 	test	$len,$len		# check length
 	jz	.Lcbc_ret
 
@@ -3735,7 +3782,9 @@
 .align	16
 .Lcbc_decrypt_bulk:
 	lea	(%rsp),%r11		# frame pointer
+.cfi_def_cfa_register	%r11
 	push	%rbp
+.cfi_push	%rbp
 	sub	\$$frame_size,%rsp
 	and	\$-16,%rsp	# Linux kernel stack can be incorrectly seeded
 ___
@@ -4179,9 +4228,12 @@
 ___
 $code.=<<___;
 	mov	-8(%r11),%rbp
+.cfi_restore	%rbp
 	lea	(%r11),%rsp
+.cfi_def_cfa_register	%rsp
 .Lcbc_ret:
 	ret
+.cfi_endproc
 .size	${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
 ___
 } 
@@ -4202,7 +4254,9 @@
 .type	${PREFIX}_set_decrypt_key,\@abi-omnipotent
 .align	16
 ${PREFIX}_set_decrypt_key:
+.cfi_startproc
 	.byte	0x48,0x83,0xEC,0x08	# sub rsp,8
+.cfi_adjust_cfa_offset	8
 	call	__aesni_set_encrypt_key
 	shl	\$4,$bits		# rounds-1 after _aesni_set_encrypt_key
 	test	%eax,%eax
@@ -4235,15 +4289,16 @@
 	pxor	%xmm0,%xmm0
 .Ldec_key_ret:
 	add	\$8,%rsp
+.cfi_adjust_cfa_offset	-8
 	ret
+.cfi_endproc
 .LSEH_end_set_decrypt_key:
 .size	${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key
 ___
 
-# This is based on submission by
-#
-#	Huang Ying <ying.huang@intel.com>
-#	Vinodh Gopal <vinodh.gopal@intel.com>
+# This is based on submission from Intel by
+#	Huang Ying
+#	Vinodh Gopal
 #	Kahraman Akdemir
 #
 # Aggressively optimized in respect to aeskeygenassist's critical path
@@ -4271,7 +4326,9 @@
 .align	16
 ${PREFIX}_set_encrypt_key:
 __aesni_set_encrypt_key:
+.cfi_startproc
 	.byte	0x48,0x83,0xEC,0x08	# sub rsp,8
+.cfi_adjust_cfa_offset	8
 	mov	\$-1,%rax
 	test	$inp,$inp
 	jz	.Lenc_key_ret
@@ -4461,7 +4518,7 @@
 
 .align	16
 .L14rounds:
-	movups	16($inp),%xmm2			# remaning half of *userKey
+	movups	16($inp),%xmm2			# remaining half of *userKey
 	mov	\$13,$bits			# 14 rounds for 256
 	lea	16(%rax),%rax
 	cmp	\$`1<<28`,%r10d			# AVX, but no XOP
@@ -4565,7 +4622,9 @@
 	pxor	%xmm4,%xmm4
 	pxor	%xmm5,%xmm5
 	add	\$8,%rsp
+.cfi_adjust_cfa_offset	-8
 	ret
+.cfi_endproc
 .LSEH_end_set_encrypt_key:
 
 .align	16
diff --git a/crypto/fipsmodule/aes/asm/aesv8-armx.pl b/crypto/fipsmodule/aes/asm/aesv8-armx.pl
index 662814a..2fc616e 100644
--- a/crypto/fipsmodule/aes/asm/aesv8-armx.pl
+++ b/crypto/fipsmodule/aes/asm/aesv8-armx.pl
@@ -929,7 +929,7 @@
 	s/^(\s+)v/$1/o		or	# strip off v prefix
 	s/\bbx\s+lr\b/ret/o;
 
-	# fix up remainig legacy suffixes
+	# fix up remaining legacy suffixes
 	s/\.[ui]?8//o;
 	m/\],#8/o and s/\.16b/\.8b/go;
 	s/\.[ui]?32//o and s/\.16b/\.4s/go;
@@ -988,7 +988,7 @@
 	s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go;	# new->old registers
 	s/\/\/\s?/@ /o;				# new->old style commentary
 
-	# fix up remainig new-style suffixes
+	# fix up remaining new-style suffixes
 	s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo	or
 	s/\],#[0-9]+/]!/o;
 
diff --git a/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl b/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
index 8d9ee6b..607f8e7 100644
--- a/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
+++ b/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
 
 ###################################################################
 ### AES-128 [originally in CTR mode]				###
@@ -1158,15 +1165,23 @@
 .type	bsaes_ecb_encrypt_blocks,\@abi-omnipotent
 .align	16
 bsaes_ecb_encrypt_blocks:
+.cfi_startproc
 	mov	%rsp, %rax
 .Lecb_enc_prologue:
 	push	%rbp
+.cfi_push	%rbp
 	push	%rbx
+.cfi_push	%rbx
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 	lea	-0x48(%rsp),%rsp
+.cfi_adjust_cfa_offset	0x48
 ___
 $code.=<<___ if ($win64);
 	lea	-0xa0(%rsp), %rsp
@@ -1184,6 +1199,7 @@
 ___
 $code.=<<___;
 	mov	%rsp,%rbp		# backup %rsp
+.cfi_def_cfa_register	%rbp
 	mov	240($arg4),%eax		# rounds
 	mov	$arg1,$inp		# backup arguments
 	mov	$arg2,$out
@@ -1328,6 +1344,7 @@
 	jb	.Lecb_enc_bzero
 
 	lea	0x78(%rbp),%rax
+.cfi_def_cfa	%rax,8
 ___
 $code.=<<___ if ($win64);
 	movaps	0x40(%rbp), %xmm6
@@ -1345,29 +1362,45 @@
 ___
 $code.=<<___;
 	mov	-48(%rax), %r15
+.cfi_restore	%r15
 	mov	-40(%rax), %r14
+.cfi_restore	%r14
 	mov	-32(%rax), %r13
+.cfi_restore	%r13
 	mov	-24(%rax), %r12
+.cfi_restore	%r12
 	mov	-16(%rax), %rbx
+.cfi_restore	%rbx
 	mov	-8(%rax), %rbp
+.cfi_restore	%rbp
 	lea	(%rax), %rsp		# restore %rsp
+.cfi_def_cfa_register	%rsp
 .Lecb_enc_epilogue:
 	ret
+.cfi_endproc
 .size	bsaes_ecb_encrypt_blocks,.-bsaes_ecb_encrypt_blocks
 
 .globl	bsaes_ecb_decrypt_blocks
 .type	bsaes_ecb_decrypt_blocks,\@abi-omnipotent
 .align	16
 bsaes_ecb_decrypt_blocks:
+.cfi_startproc
 	mov	%rsp, %rax
 .Lecb_dec_prologue:
 	push	%rbp
+.cfi_push	%rbp
 	push	%rbx
+.cfi_push	%rbx
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 	lea	-0x48(%rsp),%rsp
+.cfi_adjust_cfa_offset	0x48
 ___
 $code.=<<___ if ($win64);
 	lea	-0xa0(%rsp), %rsp
@@ -1385,6 +1418,7 @@
 ___
 $code.=<<___;
 	mov	%rsp,%rbp		# backup %rsp
+.cfi_def_cfa_register	%rbp
 	mov	240($arg4),%eax		# rounds
 	mov	$arg1,$inp		# backup arguments
 	mov	$arg2,$out
@@ -1530,6 +1564,7 @@
 	jb	.Lecb_dec_bzero
 
 	lea	0x78(%rbp),%rax
+.cfi_def_cfa	%rax,8
 ___
 $code.=<<___ if ($win64);
 	movaps	0x40(%rbp), %xmm6
@@ -1547,14 +1582,22 @@
 ___
 $code.=<<___;
 	mov	-48(%rax), %r15
+.cfi_restore	%r15
 	mov	-40(%rax), %r14
+.cfi_restore	%r14
 	mov	-32(%rax), %r13
+.cfi_restore	%r13
 	mov	-24(%rax), %r12
+.cfi_restore	%r12
 	mov	-16(%rax), %rbx
+.cfi_restore	%rbx
 	mov	-8(%rax), %rbp
+.cfi_restore	%rbp
 	lea	(%rax), %rsp		# restore %rsp
+.cfi_def_cfa_register	%rsp
 .Lecb_dec_epilogue:
 	ret
+.cfi_endproc
 .size	bsaes_ecb_decrypt_blocks,.-bsaes_ecb_decrypt_blocks
 ___
 }
@@ -1564,6 +1607,7 @@
 .type	bsaes_cbc_encrypt,\@abi-omnipotent
 .align	16
 bsaes_cbc_encrypt:
+.cfi_startproc
 ___
 $code.=<<___ if ($win64);
 	mov	48(%rsp),$arg6		# pull direction flag
@@ -1577,12 +1621,19 @@
 	mov	%rsp, %rax
 .Lcbc_dec_prologue:
 	push	%rbp
+.cfi_push	%rbp
 	push	%rbx
+.cfi_push	%rbx
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 	lea	-0x48(%rsp), %rsp
+.cfi_adjust_cfa_offset	0x48
 ___
 $code.=<<___ if ($win64);
 	mov	0xa0(%rsp),$arg5	# pull ivp
@@ -1601,6 +1652,7 @@
 ___
 $code.=<<___;
 	mov	%rsp, %rbp		# backup %rsp
+.cfi_def_cfa_register	%rbp
 	mov	240($arg4), %eax	# rounds
 	mov	$arg1, $inp		# backup arguments
 	mov	$arg2, $out
@@ -1820,6 +1872,7 @@
 	ja	.Lcbc_dec_bzero
 
 	lea	0x78(%rbp),%rax
+.cfi_def_cfa	%rax,8
 ___
 $code.=<<___ if ($win64);
 	movaps	0x40(%rbp), %xmm6
@@ -1837,29 +1890,45 @@
 ___
 $code.=<<___;
 	mov	-48(%rax), %r15
+.cfi_restore	%r15
 	mov	-40(%rax), %r14
+.cfi_restore	%r14
 	mov	-32(%rax), %r13
+.cfi_restore	%r13
 	mov	-24(%rax), %r12
+.cfi_restore	%r12
 	mov	-16(%rax), %rbx
+.cfi_restore	%rbx
 	mov	-8(%rax), %rbp
+.cfi_restore	%rbp
 	lea	(%rax), %rsp		# restore %rsp
+.cfi_def_cfa_register	%rsp
 .Lcbc_dec_epilogue:
 	ret
+.cfi_endproc
 .size	bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
 
 .globl	bsaes_ctr32_encrypt_blocks
 .type	bsaes_ctr32_encrypt_blocks,\@abi-omnipotent
 .align	16
 bsaes_ctr32_encrypt_blocks:
+.cfi_startproc
 	mov	%rsp, %rax
 .Lctr_enc_prologue:
 	push	%rbp
+.cfi_push	%rbp
 	push	%rbx
+.cfi_push	%rbx
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 	lea	-0x48(%rsp), %rsp
+.cfi_adjust_cfa_offset	0x48
 ___
 $code.=<<___ if ($win64);
 	mov	0xa0(%rsp),$arg5	# pull ivp
@@ -1878,6 +1947,7 @@
 ___
 $code.=<<___;
 	mov	%rsp, %rbp		# backup %rsp
+.cfi_def_cfa_register	%rbp
 	movdqu	($arg5), %xmm0		# load counter
 	mov	240($arg4), %eax	# rounds
 	mov	$arg1, $inp		# backup arguments
@@ -2052,6 +2122,7 @@
 	ja	.Lctr_enc_bzero
 
 	lea	0x78(%rbp),%rax
+.cfi_def_cfa	%rax,8
 ___
 $code.=<<___ if ($win64);
 	movaps	0x40(%rbp), %xmm6
@@ -2069,14 +2140,22 @@
 ___
 $code.=<<___;
 	mov	-48(%rax), %r15
+.cfi_restore	%r15
 	mov	-40(%rax), %r14
+.cfi_restore	%r14
 	mov	-32(%rax), %r13
+.cfi_restore	%r13
 	mov	-24(%rax), %r12
+.cfi_restore	%r12
 	mov	-16(%rax), %rbx
+.cfi_restore	%rbx
 	mov	-8(%rax), %rbp
+.cfi_restore	%rbp
 	lea	(%rax), %rsp		# restore %rsp
+.cfi_def_cfa_register	%rsp
 .Lctr_enc_epilogue:
 	ret
+.cfi_endproc
 .size	bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
 ___
 ######################################################################
@@ -2092,15 +2171,23 @@
 .type	bsaes_xts_encrypt,\@abi-omnipotent
 .align	16
 bsaes_xts_encrypt:
+.cfi_startproc
 	mov	%rsp, %rax
 .Lxts_enc_prologue:
 	push	%rbp
+.cfi_push	%rbp
 	push	%rbx
+.cfi_push	%rbx
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 	lea	-0x48(%rsp), %rsp
+.cfi_adjust_cfa_offset	0x48
 ___
 $code.=<<___ if ($win64);
 	mov	0xa0(%rsp),$arg5	# pull key2
@@ -2120,6 +2207,7 @@
 ___
 $code.=<<___;
 	mov	%rsp, %rbp		# backup %rsp
+.cfi_def_cfa_register	%rbp
 	mov	$arg1, $inp		# backup arguments
 	mov	$arg2, $out
 	mov	$arg3, $len
@@ -2442,6 +2530,7 @@
 	ja	.Lxts_enc_bzero
 
 	lea	0x78(%rbp),%rax
+.cfi_def_cfa	%rax,8
 ___
 $code.=<<___ if ($win64);
 	movaps	0x40(%rbp), %xmm6
@@ -2459,29 +2548,45 @@
 ___
 $code.=<<___;
 	mov	-48(%rax), %r15
+.cfi_restore	%r15
 	mov	-40(%rax), %r14
+.cfi_restore	%r14
 	mov	-32(%rax), %r13
+.cfi_restore	%r13
 	mov	-24(%rax), %r12
+.cfi_restore	%r12
 	mov	-16(%rax), %rbx
+.cfi_restore	%rbx
 	mov	-8(%rax), %rbp
+.cfi_restore	%rbp
 	lea	(%rax), %rsp		# restore %rsp
+.cfi_def_cfa_register	%rsp
 .Lxts_enc_epilogue:
 	ret
+.cfi_endproc
 .size	bsaes_xts_encrypt,.-bsaes_xts_encrypt
 
 .globl	bsaes_xts_decrypt
 .type	bsaes_xts_decrypt,\@abi-omnipotent
 .align	16
 bsaes_xts_decrypt:
+.cfi_startproc
 	mov	%rsp, %rax
 .Lxts_dec_prologue:
 	push	%rbp
+.cfi_push	%rbp
 	push	%rbx
+.cfi_push	%rbx
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 	lea	-0x48(%rsp), %rsp
+.cfi_adjust_cfa_offset	0x48
 ___
 $code.=<<___ if ($win64);
 	mov	0xa0(%rsp),$arg5	# pull key2
@@ -2849,6 +2954,7 @@
 	ja	.Lxts_dec_bzero
 
 	lea	0x78(%rbp),%rax
+.cfi_def_cfa	%rax,8
 ___
 $code.=<<___ if ($win64);
 	movaps	0x40(%rbp), %xmm6
@@ -2866,14 +2972,22 @@
 ___
 $code.=<<___;
 	mov	-48(%rax), %r15
+.cfi_restore	%r15
 	mov	-40(%rax), %r14
+.cfi_restore	%r14
 	mov	-32(%rax), %r13
+.cfi_restore	%r13
 	mov	-24(%rax), %r12
+.cfi_restore	%r12
 	mov	-16(%rax), %rbx
+.cfi_restore	%rbx
 	mov	-8(%rax), %rbp
+.cfi_restore	%rbp
 	lea	(%rax), %rsp		# restore %rsp
+.cfi_def_cfa_register	%rsp
 .Lxts_dec_epilogue:
 	ret
+.cfi_endproc
 .size	bsaes_xts_decrypt,.-bsaes_xts_decrypt
 ___
 }