Revert "sha: Move Armv7 dispatching to C"
google3 is unhappy with this and needs some more love on where
the adr target gets placed to fit.
This reverts commit 62f43f5ea57b9b208fc784e5fa959bce89ebd718.
Change-Id: I1e335c635590fdda72a8a98314a1640d5b7ea179
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65328
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: Bob Beck <bbe@google.com>
Auto-Submit: Bob Beck <bbe@google.com>
diff --git a/crypto/fipsmodule/sha/asm/sha1-armv4-large.pl b/crypto/fipsmodule/sha/asm/sha1-armv4-large.pl
index 532a81b..c52b546 100644
--- a/crypto/fipsmodule/sha/asm/sha1-armv4-large.pl
+++ b/crypto/fipsmodule/sha/asm/sha1-armv4-large.pl
@@ -197,11 +197,24 @@
.code 32
#endif
-.global sha1_block_data_order_nohw
-.type sha1_block_data_order_nohw,%function
+.global sha1_block_data_order
+.type sha1_block_data_order,%function
.align 5
-sha1_block_data_order_nohw:
+sha1_block_data_order:
+#if __ARM_MAX_ARCH__>=7
+.Lsha1_block:
+ adr r3,.Lsha1_block
+ ldr r12,.LOPENSSL_armcap
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
+#ifdef __APPLE__
+ ldr r12,[r12]
+#endif
+ tst r12,#ARMV8_SHA1
+ bne .LARMv8
+ tst r12,#ARMV7_NEON
+ bne .LNEON
+#endif
stmdb sp!,{r4-r12,lr}
add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp
ldmia $ctx,{$a,$b,$c,$d,$e}
@@ -291,13 +304,17 @@
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
#endif
-.size sha1_block_data_order_nohw,.-sha1_block_data_order_nohw
+.size sha1_block_data_order,.-sha1_block_data_order
.align 5
.LK_00_19: .word 0x5a827999
.LK_20_39: .word 0x6ed9eba1
.LK_40_59: .word 0x8f1bbcdc
.LK_60_79: .word 0xca62c1d6
+#if __ARM_MAX_ARCH__>=7
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-.Lsha1_block
+#endif
.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
___
@@ -513,10 +530,10 @@
.arch armv7-a
.fpu neon
-.global sha1_block_data_order_neon
.type sha1_block_data_order_neon,%function
.align 4
sha1_block_data_order_neon:
+.LNEON:
stmdb sp!,{r4-r12,lr}
add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp
@ dmb @ errata #451034 on early Cortex A8
@@ -608,10 +625,10 @@
# define INST(a,b,c,d) .byte a,b,c,d|0x10
# endif
-.global sha1_block_data_order_hw
-.type sha1_block_data_order_hw,%function
+.type sha1_block_data_order_armv8,%function
.align 5
-sha1_block_data_order_hw:
+sha1_block_data_order_armv8:
+.LARMv8:
vstmdb sp!,{d8-d15} @ ABI specification says so
veor $E,$E,$E
@@ -676,10 +693,16 @@
vldmia sp!,{d8-d15}
ret @ bx lr
-.size sha1_block_data_order_hw,.-sha1_block_data_order_hw
+.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8
#endif
___
}}}
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.comm OPENSSL_armcap_P,4,4
+.hidden OPENSSL_armcap_P
+#endif
+___
{ my %opcode = (
"sha1c" => 0xf2000c40, "sha1p" => 0xf2100c40,
diff --git a/crypto/fipsmodule/sha/asm/sha256-armv4.pl b/crypto/fipsmodule/sha/asm/sha256-armv4.pl
index 59f3417..fa82f3c 100644
--- a/crypto/fipsmodule/sha/asm/sha256-armv4.pl
+++ b/crypto/fipsmodule/sha/asm/sha256-armv4.pl
@@ -217,15 +217,34 @@
.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.size K256,.-K256
.word 0 @ terminator
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-.Lsha256_block_data_order
+#endif
.align 5
-.global sha256_block_data_order_nohw
-.type sha256_block_data_order_nohw,%function
-sha256_block_data_order_nohw:
+.global sha256_block_data_order
+.type sha256_block_data_order,%function
+sha256_block_data_order:
+.Lsha256_block_data_order:
+ adr r3,.Lsha256_block_data_order
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ ldr r12,.LOPENSSL_armcap
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
+#ifdef __APPLE__
+ ldr r12,[r12]
+#endif
+ tst r12,#ARMV8_SHA256
+ bne .LARMv8
+ tst r12,#ARMV7_NEON
+ bne .LNEON
+#endif
add $len,$inp,$len,lsl#6 @ len to point at the end of inp
stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
- adr $Ktbl,K256
+ @ TODO(davidben): When the OPENSSL_armcap logic above is removed,
+ @ replace this with a simple ADR.
+ sub $Ktbl,r3,#256+32 @ K256
sub sp,sp,#16*4 @ alloca(X[16])
.Loop:
# if __ARM_ARCH>=7
@@ -279,7 +298,7 @@
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
#endif
-.size sha256_block_data_order_nohw,.-sha256_block_data_order_nohw
+.size sha256_block_data_order,.-sha256_block_data_order
___
######################################################################
# NEON stuff
@@ -464,12 +483,10 @@
.align 5
.skip 16
sha256_block_data_order_neon:
+.LNEON:
stmdb sp!,{r4-r12,lr}
sub $H,sp,#16*4+16
- @ In Arm mode, the following ADR runs up against the limits of encodable
- @ offsets. It only fits because the offset, when the ADR is placed here,
- @ is a multiple of 16.
adr $Ktbl,K256
bic $H,$H,#15 @ align for 128-bit stores
mov $t2,sp
@@ -596,26 +613,12 @@
# define INST(a,b,c,d) .byte a,b,c,d
# endif
-.LK256_shortcut:
-@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
-#if defined(__thumb2__)
-.word K256-(.LK256_add+4)
-#else
-.word K256-(.LK256_add+8)
-#endif
-
-.global sha256_block_data_order_hw
-.type sha256_block_data_order_hw,%function
+.type sha256_block_data_order_armv8,%function
.align 5
-sha256_block_data_order_hw:
- @ K256 is too far to reference from one ADR command in Thumb mode. In
- @ Arm mode, we could make it fit by aligning the ADR offset to a 64-byte
- @ boundary. For simplicity, just load the offset from .LK256_shortcut.
- ldr $Ktbl,.LK256_shortcut
-.LK256_add:
- add $Ktbl,pc,$Ktbl
-
+sha256_block_data_order_armv8:
+.LARMv8:
vld1.32 {$ABCD,$EFGH},[$ctx]
+ sub $Ktbl,$Ktbl,#256+32
add $len,$inp,$len,lsl#6 @ len to point at the end of inp
b .Loop_v8
@@ -677,13 +680,17 @@
vst1.32 {$ABCD,$EFGH},[$ctx]
ret @ bx lr
-.size sha256_block_data_order_hw,.-sha256_block_data_order_hw
+.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
#endif
___
}}}
$code.=<<___;
.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm OPENSSL_armcap_P,4,4
+.hidden OPENSSL_armcap_P
+#endif
___
open SELF,$0;
diff --git a/crypto/fipsmodule/sha/asm/sha512-armv4.pl b/crypto/fipsmodule/sha/asm/sha512-armv4.pl
index f2d1d22..f52b5b0 100644
--- a/crypto/fipsmodule/sha/asm/sha512-armv4.pl
+++ b/crypto/fipsmodule/sha/asm/sha512-armv4.pl
@@ -276,13 +276,33 @@
WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.size K512,.-K512
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-.Lsha512_block_data_order
+.skip 32-4
+#else
+.skip 32
+#endif
-.global sha512_block_data_order_nohw
-.type sha512_block_data_order_nohw,%function
-sha512_block_data_order_nohw:
+.global sha512_block_data_order
+.type sha512_block_data_order,%function
+sha512_block_data_order:
+.Lsha512_block_data_order:
+ adr r3,.Lsha512_block_data_order
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ ldr r12,.LOPENSSL_armcap
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
+#ifdef __APPLE__
+ ldr r12,[r12]
+#endif
+ tst r12,#ARMV7_NEON
+ bne .LNEON
+#endif
add $len,$inp,$len,lsl#7 @ len to point at the end of inp
stmdb sp!,{r4-r12,lr}
- adr $Ktbl,K512
+ @ TODO(davidben): When the OPENSSL_armcap logic above is removed,
+ @ replace this with a simple ADR.
+ sub $Ktbl,r3,#672 @ K512
sub sp,sp,#9*8
ldr $Elo,[$ctx,#$Eoff+$lo]
@@ -481,7 +501,7 @@
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
#endif
-.size sha512_block_data_order_nohw,.-sha512_block_data_order_nohw
+.size sha512_block_data_order,.-sha512_block_data_order
___
{
@@ -592,6 +612,7 @@
.type sha512_block_data_order_neon,%function
.align 4
sha512_block_data_order_neon:
+.LNEON:
dmb @ errata #451034 on early Cortex A8
add $len,$inp,$len,lsl#7 @ len to point at the end of inp
adr $Ktbl,K512
@@ -629,6 +650,10 @@
$code.=<<___;
.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm OPENSSL_armcap_P,4,4
+.hidden OPENSSL_armcap_P
+#endif
___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
diff --git a/crypto/fipsmodule/sha/internal.h b/crypto/fipsmodule/sha/internal.h
index 0c05d73..7c15b2c 100644
--- a/crypto/fipsmodule/sha/internal.h
+++ b/crypto/fipsmodule/sha/internal.h
@@ -26,7 +26,7 @@
// Define SHA{n}[_{variant}]_ASM if sha{n}_block_data_order[_{variant}] is
// defined in assembly.
-#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86)
+#if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_X86) || defined(OPENSSL_ARM))
#define SHA1_ASM
#define SHA256_ASM
@@ -39,35 +39,6 @@
void sha512_block_data_order(uint64_t *state, const uint8_t *data,
size_t num_blocks);
-#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM)
-
-#define SHA1_ASM_NOHW
-#define SHA256_ASM_NOHW
-#define SHA512_ASM_NOHW
-
-#define SHA1_ASM_HW
-OPENSSL_INLINE int sha1_hw_capable(void) {
- return CRYPTO_is_ARMv8_SHA1_capable();
-}
-
-#define SHA1_ASM_NEON
-void sha1_block_data_order_neon(uint32_t *state, const uint8_t *data,
- size_t num);
-
-#define SHA256_ASM_HW
-OPENSSL_INLINE int sha256_hw_capable(void) {
- return CRYPTO_is_ARMv8_SHA256_capable();
-}
-
-#define SHA256_ASM_NEON
-void sha256_block_data_order_neon(uint32_t *state, const uint8_t *data,
- size_t num);
-
-// Armv8.2 SHA-512 instructions are not available in 32-bit.
-#define SHA512_ASM_NEON
-void sha512_block_data_order_neon(uint64_t *state, const uint8_t *data,
- size_t num);
-
#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64)
#define SHA1_ASM_NOHW
@@ -178,7 +149,6 @@
void sha512_block_data_order_hw(uint64_t *state, const uint8_t *data,
size_t num);
#endif
-
#if defined(SHA512_ASM_NOHW)
void sha512_block_data_order_nohw(uint64_t *state, const uint8_t *data,
size_t num);
diff --git a/crypto/fipsmodule/sha/sha1.c b/crypto/fipsmodule/sha/sha1.c
index 7a97266..7b267e3 100644
--- a/crypto/fipsmodule/sha/sha1.c
+++ b/crypto/fipsmodule/sha/sha1.c
@@ -410,12 +410,6 @@
return;
}
#endif
-#if defined(SHA1_ASM_NEON)
- if (CRYPTO_is_NEON_capable()) {
- sha1_block_data_order_neon(state, data, num);
- return;
- }
-#endif
sha1_block_data_order_nohw(state, data, num);
}
diff --git a/crypto/fipsmodule/sha/sha256.c b/crypto/fipsmodule/sha/sha256.c
index 8cedc5f..0b0aca2 100644
--- a/crypto/fipsmodule/sha/sha256.c
+++ b/crypto/fipsmodule/sha/sha256.c
@@ -332,12 +332,6 @@
return;
}
#endif
-#if defined(SHA256_ASM_NEON)
- if (CRYPTO_is_NEON_capable()) {
- sha256_block_data_order_neon(state, data, num);
- return;
- }
-#endif
sha256_block_data_order_nohw(state, data, num);
}
diff --git a/crypto/fipsmodule/sha/sha512.c b/crypto/fipsmodule/sha/sha512.c
index d31ab71..0f4142c 100644
--- a/crypto/fipsmodule/sha/sha512.c
+++ b/crypto/fipsmodule/sha/sha512.c
@@ -516,12 +516,6 @@
return;
}
#endif
-#if defined(SHA512_ASM_NEON)
- if (CRYPTO_is_NEON_capable()) {
- sha512_block_data_order_neon(state, data, num);
- return;
- }
-#endif
sha512_block_data_order_nohw(state, data, num);
}
diff --git a/crypto/fipsmodule/sha/sha_test.cc b/crypto/fipsmodule/sha/sha_test.cc
index 671c170..22856f8 100644
--- a/crypto/fipsmodule/sha/sha_test.cc
+++ b/crypto/fipsmodule/sha/sha_test.cc
@@ -75,11 +75,6 @@
return;
}
#endif
-#if defined(SHA1_ASM_NEON)
- if (CRYPTO_is_NEON_capable()) {
- CHECK_ABI(sha1_block_data_order_neon, ctx.h, kBuf, blocks);
- }
-#endif
#if defined(SHA1_ASM_NOHW)
CHECK_ABI(sha1_block_data_order_nohw, ctx.h, kBuf, blocks);
#endif
@@ -112,11 +107,6 @@
return;
}
#endif
-#if defined(SHA256_ASM_NEON)
- if (CRYPTO_is_NEON_capable()) {
- CHECK_ABI(sha256_block_data_order_neon, ctx.h, kBuf, blocks);
- }
-#endif
#if defined(SHA256_ASM_NOHW)
CHECK_ABI(sha256_block_data_order_nohw, ctx.h, kBuf, blocks);
#endif
@@ -142,11 +132,6 @@
CHECK_ABI(sha512_block_data_order_avx, ctx.h, kBuf, blocks);
}
#endif
-#if defined(SHA512_ASM_NEON)
- if (CRYPTO_is_NEON_capable()) {
- CHECK_ABI(sha512_block_data_order_neon, ctx.h, kBuf, blocks);
- }
-#endif
#if defined(SHA512_ASM_NOHW)
CHECK_ABI(sha512_block_data_order_nohw, ctx.h, kBuf, blocks);
#endif