bn/asm/rsaz-avx2.pl: fix digit correction bug in rsaz_1024_mul_avx2.
Credit to OSS-Fuzz for finding this.
CVE-2017-3738
(Imported from upstream's 5630661aecbea5fe3c4740f5fea744a1f07a6253 and
77d75993651b63e872244a3256e37967bb3c3e9e.)
Confirmed with Intel SDE that the fix makes the test vector pass and
that, without the fix, the test vector does not. (Well, we knew the
latter already, since it was our test vector.)
Change-Id: I167aa3407ddab3b434bacbd18e099c55aa40ac4c
Reviewed-on: https://boringssl-review.googlesource.com/23884
Reviewed-by: Adam Langley <agl@google.com>
(cherry picked from commit 296a61d6007688a1472798879b81517920e35dff)
Reviewed-on: https://boringssl-review.googlesource.com/23964
Reviewed-by: David Benjamin <davidben@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
diff --git a/crypto/fipsmodule/bn/asm/rsaz-avx2.pl b/crypto/fipsmodule/bn/asm/rsaz-avx2.pl
index 0bb50cd..32c2167 100755
--- a/crypto/fipsmodule/bn/asm/rsaz-avx2.pl
+++ b/crypto/fipsmodule/bn/asm/rsaz-avx2.pl
@@ -232,7 +232,7 @@
vmovdqu 32*8-128($ap), $ACC8
lea 192(%rsp), $tp0 # 64+128=192
- vpbroadcastq .Land_mask(%rip), $AND_MASK
+ vmovdqu .Land_mask(%rip), $AND_MASK
jmp .LOOP_GRANDE_SQR_1024
.align 32
@@ -1082,10 +1082,10 @@
vpmuludq 32*6-128($np),$Yi,$TEMP1
vpaddq $TEMP1,$ACC6,$ACC6
vpmuludq 32*7-128($np),$Yi,$TEMP2
- vpblendd \$3, $ZERO, $ACC9, $ACC9 # correct $ACC3
+ vpblendd \$3, $ZERO, $ACC9, $TEMP1 # correct $ACC3
vpaddq $TEMP2,$ACC7,$ACC7
vpmuludq 32*8-128($np),$Yi,$TEMP0
- vpaddq $ACC9, $ACC3, $ACC3 # correct $ACC3
+ vpaddq $TEMP1, $ACC3, $ACC3 # correct $ACC3
vpaddq $TEMP0,$ACC8,$ACC8
mov %rbx, %rax
@@ -1098,7 +1098,9 @@
vmovdqu -8+32*2-128($ap),$TEMP2
mov $r1, %rax
+ vpblendd \$0xfc, $ZERO, $ACC9, $ACC9 # correct $ACC3
imull $n0, %eax
+ vpaddq $ACC9,$ACC4,$ACC4 # correct $ACC3
and \$0x1fffffff, %eax
imulq 16-128($ap),%rbx
@@ -1334,15 +1336,12 @@
# But as we underutilize resources, it's possible to correct in
# each iteration with marginal performance loss. But then, as
# we do it in each iteration, we can correct less digits, and
-# avoid performance penalties completely. Also note that we
-# correct only three digits out of four. This works because
-# most significant digit is subjected to less additions.
+# avoid performance penalties completely.
$TEMP0 = $ACC9;
$TEMP3 = $Bi;
$TEMP4 = $Yi;
$code.=<<___;
- vpermq \$0, $AND_MASK, $AND_MASK
vpaddq (%rsp), $TEMP1, $ACC0
vpsrlq \$29, $ACC0, $TEMP1
@@ -1790,7 +1789,7 @@
.align 64
.Land_mask:
- .quad 0x1fffffff,0x1fffffff,0x1fffffff,-1
+ .quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
.Lscatter_permd:
.long 0,2,4,6,7,7,7,7
.Lgather_permd:
diff --git a/crypto/fipsmodule/bn/bn_tests.txt b/crypto/fipsmodule/bn/bn_tests.txt
index eb447b5..87e64e2 100644
--- a/crypto/fipsmodule/bn/bn_tests.txt
+++ b/crypto/fipsmodule/bn/bn_tests.txt
@@ -10507,6 +10507,12 @@
E = 61803d4973ae68cfb2ba6770dbed70d36760fa42c01a16d1482eacf0d01adf7a917bc86ece58a73b920295c1291b90f49167ef856ecad149330e1fd49ec71392fb62d47270b53e6d4f3c8f044b80a5736753364896932abc6d872c4c5e135d1edb200597a93ceb262ff6c99079177cd10808b9ed20c8cd7352d80ac7f6963103
M = b5d257b2c50b050d42f0852eff5cfa2571157c500cd0bd9aa0b2ccdd89c531c9609d520eb81d928fb52b06da25dc713561aa0bd365ee56db9e62ac6787a85936990f44438363560f7af9e0c16f378e5b83f658252390d849401817624da97ec613a1b855fd901847352f434a777e4e32af0cb4033c7547fb6437d067fcd3d965
+# Regression test for CVE-2017-3738.
+ModExp = d360792bd8210786607817c3dda64cc38c8d0f25569597cb1f363c7919a0c3587baff01a2283edaeb04fc288ac0ab3f279b2a89ffcb452d8bdf72422a9f9780f4aa702dc964cf033149d3a339883062cab8564aebdbfac0bf68985e522c6fe545b346044690c525ca85d3f4eb3e3c25cdf541545afc84a309e9b1d7807003461
+A = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff2020202020df
+E = 2020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020FF2020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020
+M = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff2020202020ff
+
# Exp tests.
#