Add benchmarks for hash functions to bssl speed.
diff --git a/crypto/sha/asm/sha1-586.pl b/crypto/sha/asm/sha1-586.pl
index acc4f63..81252a6 100644
--- a/crypto/sha/asm/sha1-586.pl
+++ b/crypto/sha/asm/sha1-586.pl
@@ -128,6 +128,8 @@
 		`ml 2>&1` =~ /Version ([0-9]+)\./ &&
 		$1>=10);	# first version supporting AVX
 
+$shaext=$xmm;	### set to zero if compiling for 1.0.1
+
 &external_label("OPENSSL_ia32cap_P") if ($xmm);
 
 
@@ -307,7 +309,7 @@
 
 &function_begin("sha1_block_data_order");
 if ($xmm) {
-  &static_label("shaext_shortcut");
+  &static_label("shaext_shortcut")	if ($shaext);
   &static_label("ssse3_shortcut");
   &static_label("avx_shortcut")		if ($ymm);
   &static_label("K_XX_XX");
@@ -325,8 +327,10 @@
 	&mov	($C,&DWP(8,$T));
 	&test	($A,1<<24);		# check FXSR bit
 	&jz	(&label("x86"));
-	&test	($C,1<<29);		# check SHA bit
-	&jnz	(&label("shaext_shortcut"));
+	if ($shaext) {
+		&test	($C,1<<29);		# check SHA bit
+		&jnz	(&label("shaext_shortcut"));
+	}
 	if ($ymm) {
 		&and	($D,1<<28);		# mask AVX bit
 		&and	($A,1<<30);		# mask "Intel CPU" bit
@@ -405,7 +409,7 @@
 &function_end("sha1_block_data_order");
 
 if ($xmm) {
-{
+if ($shaext) {
 ######################################################################
 # Intel SHA Extensions implementation of SHA1 update function.
 #
diff --git a/crypto/sha/asm/sha1-x86_64.pl b/crypto/sha/asm/sha1-x86_64.pl
index ea288c1..01010cf 100644
--- a/crypto/sha/asm/sha1-x86_64.pl
+++ b/crypto/sha/asm/sha1-x86_64.pl
@@ -107,6 +107,9 @@
 	$avx = ($1>=10) + ($1>=11);
 }
 
+$shaext=0;	### set to zero if compiling for 1.0.1
+$avx=1		if (!$shaext && $avx);
+
 open OUT,"| \"$^X\" $xlate $flavour $output";
 *STDOUT=*OUT;
 
@@ -245,7 +248,8 @@
 	mov	OPENSSL_ia32cap_P+8(%rip),%r10d
 	test	\$`1<<9`,%r8d		# check SSSE3 bit
 	jz	.Lialu
-
+___
+$code.=<<___ if ($shaext);
 	test	\$`1<<29`,%r10d		# check SHA bit	
 	jnz	_shaext_shortcut
 ___
@@ -321,7 +325,7 @@
 	ret
 .size	sha1_block_data_order,.-sha1_block_data_order
 ___
-{{{
+if ($shaext) {{{
 ######################################################################
 # Intel SHA Extensions implementation of SHA1 update function.
 #
@@ -1956,9 +1960,13 @@
 	.rva	.LSEH_begin_sha1_block_data_order
 	.rva	.LSEH_end_sha1_block_data_order
 	.rva	.LSEH_info_sha1_block_data_order
+___
+$code.=<<___ if ($shaext);
 	.rva	.LSEH_begin_sha1_block_data_order_shaext
 	.rva	.LSEH_end_sha1_block_data_order_shaext
 	.rva	.LSEH_info_sha1_block_data_order_shaext
+___
+$code.=<<___;
 	.rva	.LSEH_begin_sha1_block_data_order_ssse3
 	.rva	.LSEH_end_sha1_block_data_order_ssse3
 	.rva	.LSEH_info_sha1_block_data_order_ssse3
diff --git a/crypto/sha/asm/sha256-586.pl b/crypto/sha/asm/sha256-586.pl
index 09648a8..ee094a9 100644
--- a/crypto/sha/asm/sha256-586.pl
+++ b/crypto/sha/asm/sha256-586.pl
@@ -82,6 +82,8 @@
 	$avx = ($1>=10) + ($1>=11);
 }
 
+$shaext=$xmm;	### set to zero if compiling for 1.0.1
+
 $unroll_after = 64*4;	# If pre-evicted from L1P cache first spin of
 			# fully unrolled loop was measured to run about
 			# 3-4x slower. If slowdown coefficient is N and
@@ -205,8 +207,8 @@
 	&jz	($unroll_after?&label("no_xmm"):&label("loop"));
 	&and	("ecx",1<<30);		# mask "Intel CPU" bit
 	&and	("ebx",1<<28|1<<9);	# mask AVX and SSSE3 bits
-	&test	("edx",1<<29)		if ($xmm);	# check for SHA
-	&jnz	(&label("shaext"))	if ($xmm);
+	&test	("edx",1<<29)		if ($shaext);	# check for SHA
+	&jnz	(&label("shaext"))	if ($shaext);
 	&or	("ecx","ebx");
 	&and	("ecx",1<<28|1<<30);
 	&cmp	("ecx",1<<28|1<<30);
@@ -505,7 +507,7 @@
 &function_end_A();
 }
 						if (!$i386 && $xmm) {{{
-{
+if ($shaext) {
 ######################################################################
 # Intel SHA Extensions implementation of SHA256 update function.
 #
diff --git a/crypto/sha/asm/sha512-x86_64.pl b/crypto/sha/asm/sha512-x86_64.pl
index e2253f1..bc5620d 100644
--- a/crypto/sha/asm/sha512-x86_64.pl
+++ b/crypto/sha/asm/sha512-x86_64.pl
@@ -123,6 +123,9 @@
 	$avx = ($1>=10) + ($1>=11);
 }
 
+$shaext=1;	### set to zero if compiling for 1.0.1
+$avx=1		if (!$shaext && $avx);
+
 open OUT,"| \"$^X\" $xlate $flavour";
 *STDOUT=*OUT;
 
@@ -259,7 +262,7 @@
 	mov	4(%r11),%r10d
 	mov	8(%r11),%r11d
 ___
-$code.=<<___ if ($SZ==4);
+$code.=<<___ if ($SZ==4 && $shaext);
 	test	\$`1<<29`,%r11d		# check for SHA
 	jnz	_shaext_shortcut
 ___
@@ -518,7 +521,7 @@
 ######################################################################
 # SIMD code paths
 #
-if ($SZ==4) {{{
+if ($SZ==4 && $shaext) {{{
 ######################################################################
 # Intel SHA Extensions implementation of SHA256 update function.
 #
@@ -2295,10 +2298,12 @@
 	.rva	.LSEH_end_$func
 	.rva	.LSEH_info_$func
 ___
-$code.=<<___ if ($SZ==4);
+$code.=<<___ if ($SZ==4 && $shext);
 	.rva	.LSEH_begin_${func}_shaext
 	.rva	.LSEH_end_${func}_shaext
 	.rva	.LSEH_info_${func}_shaext
+___
+$code.=<<___ if ($SZ==4);
 	.rva	.LSEH_begin_${func}_ssse3
 	.rva	.LSEH_end_${func}_ssse3
 	.rva	.LSEH_info_${func}_ssse3