Put arm/aarch64 assembly functions in their own section.

This change causes each global arm or aarch64 asm function to be put
into its own section by default. This matches the behaviour of the
-ffunction-sections option to GCC and allows the --gc-sections option to
the linker to discard unused asm functions on a function-by-function
basis.

Sometimes several asm functions will share the same data an, in that
situation, the data is put into the section of one of the functions and
the section of the other function is merged with the added
“.global_with_section” directive.

Change-Id: I12c9b844d48d104d28beb816764358551eac4456
Reviewed-on: https://boringssl-review.googlesource.com/6003
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/aes/asm/aes-armv4.pl b/crypto/aes/asm/aes-armv4.pl
index 882017a..16ff990 100644
--- a/crypto/aes/asm/aes-armv4.pl
+++ b/crypto/aes/asm/aes-armv4.pl
@@ -82,6 +82,7 @@
 # endif
 #endif
 
+.pushsection .text.asm_AES_encrypt,"ax",%progbits
 .type	AES_Te,%object
 .align	5
 AES_Te:
@@ -187,6 +188,7 @@
 .word	0x10000000, 0x20000000, 0x40000000, 0x80000000
 .word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
 .size	AES_Te,.-AES_Te
+.popsection
 
 @ void asm_AES_encrypt(const unsigned char *in, unsigned char *out,
 @ 		       const AES_KEY *key) {
@@ -440,7 +442,7 @@
 	ldr	pc,[sp],#4		@ pop and return
 .size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
 
-.global asm_AES_set_encrypt_key
+.global_with_section asm_AES_set_encrypt_key, asm_AES_encrypt
 .hidden asm_AES_set_encrypt_key
 .type   asm_AES_set_encrypt_key,%function
 .align	5
@@ -866,6 +868,7 @@
 #endif
 .size	AES_set_enc2dec_key,.-AES_set_enc2dec_key
 
+.pushsection .text.asm_AES_decrypt,"ax",%progbits
 .type	AES_Td,%object
 .align	5
 AES_Td:
@@ -967,6 +970,7 @@
 .byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
 .byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
 .size	AES_Td,.-AES_Td
+.popsection
 
 @ void asm_AES_decrypt(const unsigned char *in, unsigned char *out,
 @ 		       const AES_KEY *key) {
diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl
index 121154a..d770fa3 100644
--- a/crypto/aes/asm/aesv8-armx.pl
+++ b/crypto/aes/asm/aesv8-armx.pl
@@ -72,10 +72,12 @@
 
 $code.=<<___;
 .align	5
+.pushsection .text.${prefix}_set_encrypt_key,"ax",%progbits
 .Lrcon:
 .long	0x01,0x01,0x01,0x01
 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
 .long	0x1b,0x1b,0x1b,0x1b
+.popsection
 
 .globl	${prefix}_set_encrypt_key
 .type	${prefix}_set_encrypt_key,%function
diff --git a/crypto/aes/asm/bsaes-armv7.pl b/crypto/aes/asm/bsaes-armv7.pl
index 2617fef..5fa3005 100644
--- a/crypto/aes/asm/bsaes-armv7.pl
+++ b/crypto/aes/asm/bsaes-armv7.pl
@@ -2049,7 +2049,7 @@
 
 .size	bsaes_xts_encrypt,.-bsaes_xts_encrypt
 
-.globl	bsaes_xts_decrypt
+.global_with_section	bsaes_xts_decrypt, bsaes_xts_encrypt
 .hidden	bsaes_xts_decrypt
 .type	bsaes_xts_decrypt,%function
 .align	4
diff --git a/crypto/bn/asm/armv4-mont.pl b/crypto/bn/asm/armv4-mont.pl
index 4206fd8..c464a5b 100644
--- a/crypto/bn/asm/armv4-mont.pl
+++ b/crypto/bn/asm/armv4-mont.pl
@@ -86,8 +86,10 @@
 
 #if __ARM_MAX_ARCH__>=7
 .align	5
+.pushsection .text.bn_mul_mont,"ax",%progbits
 .LOPENSSL_armcap:
 .word	OPENSSL_armcap_P-.Lbn_mul_mont
+.popsection
 #endif
 
 .global	bn_mul_mont
diff --git a/crypto/perlasm/arm-xlate.pl b/crypto/perlasm/arm-xlate.pl
index 706fa70..32eeb25 100755
--- a/crypto/perlasm/arm-xlate.pl
+++ b/crypto/perlasm/arm-xlate.pl
@@ -18,6 +18,14 @@
     if ($flavour =~ /linux/)	{ ".arch\t".join(',',@_); }
     else			{ ""; }
 };
+my $pushsection = sub {
+    if ($flavour !~ /ios/)	{ ".pushsection\t".join(',',@_); }
+    else			{ ""; }
+};
+my $popsection = sub {
+    if ($flavour !~ /ios/)	{ ".popsection"; }
+    else			{ ""; }
+};
 my $fpu = sub {
     if ($flavour =~ /linux/)	{ ".fpu\t".join(',',@_); }
     else			{ ""; }
@@ -44,7 +52,8 @@
     $$global = $name;
     $ret;
 };
-my $globl = sub {
+my $in_section = 0;
+my $raw_globl = sub {
     my $name = shift;
     my $global = \$GLOBALS{$name};
     my $ret;
@@ -59,7 +68,39 @@
     $$global = $name;
     $ret;
 };
+my $globl = sub {
+    my $name = shift;
+
+    if ($flavour !~ /ios/) {
+	if ($in_section == 1) {
+	  printf ".popsection\n";
+	}
+	$in_section = 1;
+
+	printf ".pushsection .text.$name,\"ax\",%%progbits\n";
+    }
+    return $raw_globl->($name);
+
+};
 my $global = $globl;
+my $global_with_section = sub {
+    my $arg = shift;
+
+    if ($arg =~ m/^([^,]*), *(.*)/) {
+	if ($flavour !~ /ios/) {
+	    if ($in_section == 1) {
+	      printf ".popsection\n";
+	    }
+	    $in_section = 1;
+
+	    printf ".pushsection .text.$2,\"ax\",%%progbits\n";
+	}
+	return $raw_globl->($1);
+    } else {
+	printf STDERR "Expected two arguments to global_with_section\n";
+	exit(1);
+    }
+};
 my $extern = sub {
     &$globl(@_);
     return;	# return nothing
diff --git a/crypto/sha/asm/sha256-armv4.pl b/crypto/sha/asm/sha256-armv4.pl
index 7e07147..b293f84 100644
--- a/crypto/sha/asm/sha256-armv4.pl
+++ b/crypto/sha/asm/sha256-armv4.pl
@@ -187,6 +187,7 @@
 # endif
 #endif
 
+.pushsection .text.sha256_block_data_order,"ax",%progbits
 .type	K256,%object
 .align	5
 K256:
@@ -212,6 +213,7 @@
 .LOPENSSL_armcap:
 .word	OPENSSL_armcap_P-.Lsha256_block_data_order
 #endif
+.popsection
 .align	5
 
 .global	sha256_block_data_order
@@ -471,7 +473,7 @@
 .arch	armv7-a
 .fpu	neon
 
-.global	sha256_block_data_order_neon
+.global_with_section	sha256_block_data_order_neon, sha256_block_data_order
 .type	sha256_block_data_order_neon,%function
 .align	4
 sha256_block_data_order_neon:
diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl
index cd3662a..4c1cbae 100644
--- a/crypto/sha/asm/sha512-armv4.pl
+++ b/crypto/sha/asm/sha512-armv4.pl
@@ -224,6 +224,7 @@
 # endif
 #endif
 
+.pushsection .text.sha512_block_data_order,"ax",%progbits
 .type	K512,%object
 .align	5
 K512:
@@ -275,6 +276,7 @@
 #else
 .skip	32
 #endif
+.popsection
 
 .global	sha512_block_data_order
 .type	sha512_block_data_order,%function
@@ -602,7 +604,7 @@
 .arch	armv7-a
 .fpu	neon
 
-.global	sha512_block_data_order_neon
+.global_with_section	sha512_block_data_order_neon, sha512_block_data_order
 .type	sha512_block_data_order_neon,%function
 .align	4
 sha512_block_data_order_neon: