Change |CRYPTO_chacha_20| to use 96-bit nonces, 32-bit counters.
The new function |CRYPTO_chacha_96_bit_nonce_from_64_bit_nonce| can be
used to adapt code from that uses 64 bit nonces, in a way that is
compatible with the old semantics.
Change-Id: I83d5b2d482e006e82982f58c9f981e8078c3e1b0
Reviewed-on: https://boringssl-review.googlesource.com/6100
Reviewed-by: Adam Langley <alangley@gmail.com>
diff --git a/crypto/chacha/chacha_generic.c b/crypto/chacha/chacha_generic.c
index 31cf4f0..f262033 100644
--- a/crypto/chacha/chacha_generic.c
+++ b/crypto/chacha/chacha_generic.c
@@ -54,8 +54,8 @@
#if defined(OPENSSL_ARM) && !defined(OPENSSL_NO_ASM)
/* Defined in chacha_vec.c */
void CRYPTO_chacha_20_neon(uint8_t *out, const uint8_t *in, size_t in_len,
- const uint8_t key[32], const uint8_t nonce[8],
- size_t counter);
+ const uint8_t key[32], const uint8_t nonce[12],
+ uint32_t counter);
#endif
/* chacha_core performs 20 rounds of ChaCha on the input words in
@@ -85,8 +85,8 @@
}
void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
- const uint8_t key[32], const uint8_t nonce[8],
- size_t counter) {
+ const uint8_t key[32], const uint8_t nonce[12],
+ uint32_t counter) {
uint32_t input[16];
uint8_t buf[64];
size_t todo, i;
@@ -114,9 +114,9 @@
input[11] = U8TO32_LITTLE(key + 28);
input[12] = counter;
- input[13] = ((uint64_t)counter) >> 32;
- input[14] = U8TO32_LITTLE(nonce + 0);
- input[15] = U8TO32_LITTLE(nonce + 4);
+ input[13] = U8TO32_LITTLE(nonce + 0);
+ input[14] = U8TO32_LITTLE(nonce + 4);
+ input[15] = U8TO32_LITTLE(nonce + 8);
while (in_len > 0) {
todo = sizeof(buf);
@@ -134,9 +134,6 @@
in_len -= todo;
input[12]++;
- if (input[12] == 0) {
- input[13]++;
- }
}
}
diff --git a/crypto/chacha/chacha_vec.c b/crypto/chacha/chacha_vec.c
index 14b54a7..addbaa3 100644
--- a/crypto/chacha/chacha_vec.c
+++ b/crypto/chacha/chacha_vec.c
@@ -154,12 +154,12 @@
const uint8_t *in,
size_t inlen,
const uint8_t key[32],
- const uint8_t nonce[8],
- size_t counter)
+ const uint8_t nonce[12],
+ uint32_t counter)
{
unsigned iters, i, *op=(unsigned *)out, *ip=(unsigned *)in, *kp;
#if defined(__ARM_NEON__)
- uint32_t np[2];
+ uint32_t np[3];
uint8_t alignment_buffer[16] __attribute__((aligned(16)));
#endif
vec s0, s1, s2, s3;
@@ -167,20 +167,16 @@
{0x61707865,0x3320646E,0x79622D32,0x6B206574};
kp = (unsigned *)key;
#if defined(__ARM_NEON__)
- memcpy(np, nonce, 8);
+ memcpy(np, nonce, 12);
#endif
s0 = LOAD_ALIGNED(chacha_const);
s1 = LOAD(&((vec*)kp)[0]);
s2 = LOAD(&((vec*)kp)[1]);
s3 = (vec){
- counter & 0xffffffff,
-#if __ARM_NEON__ || defined(OPENSSL_X86)
- 0, /* can't right-shift 32 bits on a 32-bit system. */
-#else
- counter >> 32,
-#endif
+ counter,
((uint32_t*)nonce)[0],
- ((uint32_t*)nonce)[1]
+ ((uint32_t*)nonce)[1],
+ ((uint32_t*)nonce)[2]
};
for (iters = 0; iters < inlen/(BPI*64); iters++)
@@ -212,8 +208,8 @@
x2 = chacha_const[2]; x3 = chacha_const[3];
x4 = kp[0]; x5 = kp[1]; x6 = kp[2]; x7 = kp[3];
x8 = kp[4]; x9 = kp[5]; x10 = kp[6]; x11 = kp[7];
- x12 = counter+BPI*iters+(BPI-1); x13 = 0;
- x14 = np[0]; x15 = np[1];
+ x12 = counter+BPI*iters+(BPI-1); x13 = np[0];
+ x14 = np[1]; x15 = np[2];
#endif
for (i = CHACHA_RNDS/2; i; i--)
{
@@ -265,9 +261,9 @@
op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6]));
op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7]));
op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + counter+BPI*iters+(BPI-1)));
- op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13));
- op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[0]));
- op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[1]));
+ op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13 + np[0]));
+ op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[1]));
+ op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[2]));
s3 += ONE;
ip += 16;
op += 16;
diff --git a/crypto/chacha/chacha_vec_arm.S b/crypto/chacha/chacha_vec_arm.S
index dfd596c..f18c867 100644
--- a/crypto/chacha/chacha_vec_arm.S
+++ b/crypto/chacha/chacha_vec_arm.S
@@ -60,137 +60,138 @@
.thumb_func
.type CRYPTO_chacha_20_neon, %function
CRYPTO_chacha_20_neon:
- @ args = 8, pretend = 0, frame = 152
+ @ args = 8, pretend = 0, frame = 160
@ frame_needed = 1, uses_anonymous_args = 0
push {r4, r5, r6, r7, r8, r9, r10, fp, lr}
- mov r8, r3
+ mov r9, r3
vpush.64 {d8, d9, d10, d11, d12, d13, d14, d15}
- mov r9, r2
+ mov r10, r2
ldr r4, .L91+16
- mov fp, r0
- mov r10, r1
- mov lr, r8
+ mov fp, r1
+ mov r8, r9
.LPIC16:
add r4, pc
- sub sp, sp, #156
+ sub sp, sp, #164
add r7, sp, #0
sub sp, sp, #112
- add r6, r7, #144
- str r0, [r7, #88]
+ add lr, r7, #148
+ str r0, [r7, #80]
str r1, [r7, #12]
str r2, [r7, #8]
ldmia r4, {r0, r1, r2, r3}
add r4, sp, #15
bic r4, r4, #15
- ldr ip, [r7, #256]
- str r4, [r7, #84]
+ ldr r6, [r7, #264]
+ str r4, [r7, #88]
mov r5, r4
adds r4, r4, #64
- adds r5, r5, #80
- str r8, [r7, #68]
+ add ip, r5, #80
+ str r9, [r7, #56]
stmia r4, {r0, r1, r2, r3}
movw r4, #43691
- ldr r0, [ip] @ unaligned
+ ldr r0, [r6] @ unaligned
movt r4, 43690
- ldr r1, [ip, #4] @ unaligned
- ldr r3, [r7, #84]
- ldr r2, [r8, #8] @ unaligned
- mov r8, #0
- stmia r6!, {r0, r1}
- mov r6, r5
- ldr r1, [lr, #4] @ unaligned
- ldr r0, [lr] @ unaligned
- vldr d24, [r3, #64]
- vldr d25, [r3, #72]
- ldr r3, [lr, #12] @ unaligned
- str r5, [r7, #80]
- stmia r5!, {r0, r1, r2, r3}
- ldr r0, [lr, #16]! @ unaligned
- ldr r2, [r7, #84]
- umull r4, r5, r9, r4
+ ldr r1, [r6, #4] @ unaligned
+ ldr r2, [r6, #8] @ unaligned
+ ldr r3, [r9, #12] @ unaligned
+ str ip, [r7, #84]
+ stmia lr!, {r0, r1, r2}
+ mov lr, ip
+ ldr r1, [r9, #4] @ unaligned
+ ldr r2, [r9, #8] @ unaligned
+ ldr r0, [r9] @ unaligned
+ vldr d24, [r5, #64]
+ vldr d25, [r5, #72]
+ umull r4, r5, r10, r4
+ stmia ip!, {r0, r1, r2, r3}
+ ldr r0, [r8, #16]! @ unaligned
+ ldr r2, [r7, #88]
+ ldr r4, [r7, #268]
+ ldr r1, [r8, #4] @ unaligned
vldr d26, [r2, #80]
vldr d27, [r2, #88]
- ldr r1, [lr, #4] @ unaligned
- ldr r2, [lr, #8] @ unaligned
- ldr r3, [lr, #12] @ unaligned
- ldr r4, [r7, #260]
- stmia r6!, {r0, r1, r2, r3}
- ldr r3, [ip]
- ldr r1, [r7, #84]
- ldr r2, [ip, #4]
- str r3, [r7, #64]
- vldr d28, [r1, #80]
- vldr d29, [r1, #88]
- str r3, [r7, #136]
+ ldr r3, [r8, #12] @ unaligned
+ ldr r2, [r8, #8] @ unaligned
+ stmia lr!, {r0, r1, r2, r3}
+ ldr r3, [r6]
+ ldr r1, [r6, #4]
+ ldr r6, [r6, #8]
+ str r3, [r7, #68]
+ str r3, [r7, #132]
lsrs r3, r5, #7
+ str r6, [r7, #140]
+ str r6, [r7, #60]
+ ldr r6, [r7, #88]
str r4, [r7, #128]
- str r2, [r7, #140]
- str r8, [r7, #132]
- str r2, [r7, #60]
+ str r1, [r7, #136]
+ str r1, [r7, #64]
+ vldr d28, [r6, #80]
+ vldr d29, [r6, #88]
vldr d22, [r7, #128]
vldr d23, [r7, #136]
beq .L26
+ mov r5, r6
lsls r2, r3, #8
- ldr r5, [r1, #64]
sub r3, r2, r3, lsl #6
- ldr r2, [r1, #68]
+ ldr r2, [r5, #68]
+ ldr r6, [r6, #64]
vldr d0, .L91
vldr d1, .L91+8
- adds r4, r4, #2
- str r5, [r7, #56]
- str r2, [r7, #52]
- ldr r5, [r1, #72]
- ldr r2, [r1, #76]
+ str r2, [r7, #48]
+ ldr r2, [r5, #72]
str r3, [r7, #4]
- str r5, [r7, #48]
+ str r6, [r7, #52]
str r2, [r7, #44]
- mov r2, fp
- str r4, [r7, #72]
+ adds r2, r4, #2
+ str r2, [r7, #72]
+ ldr r2, [r5, #76]
+ str fp, [r7, #76]
+ str r2, [r7, #40]
+ ldr r2, [r7, #80]
adds r3, r2, r3
- str r10, [r7, #76]
str r3, [r7, #16]
.L4:
- ldr r5, [r7, #68]
- add r8, r7, #44
- ldr r4, [r7, #72]
+ ldr r5, [r7, #56]
+ add r8, r7, #40
+ ldr r4, [r7, #68]
vadd.i32 q3, q11, q0
ldmia r8, {r8, r9, r10, fp}
- vmov q8, q14 @ v4si
+ mov r1, r5
ldr r2, [r5, #4]
- vmov q1, q13 @ v4si
+ vmov q8, q14 @ v4si
ldr r3, [r5]
+ vmov q1, q13 @ v4si
+ ldr r6, [r1, #28]
vmov q9, q12 @ v4si
- ldr lr, [r5, #20]
- vmov q2, q11 @ v4si
mov r0, r2
ldr r2, [r5, #8]
- str r3, [r7, #108]
- mov r3, r5
- ldr ip, [r5, #16]
- vmov q15, q14 @ v4si
- mov r1, r2
- ldr r2, [r5, #12]
- ldr r5, [r5, #24]
- vmov q5, q13 @ v4si
- ldr r6, [r3, #28]
- vmov q10, q12 @ v4si
- ldr r3, [r7, #64]
- str r5, [r7, #116]
- movs r5, #10
- str r6, [r7, #120]
str r4, [r7, #112]
+ movs r1, #10
+ ldr r4, [r7, #72]
+ vmov q2, q11 @ v4si
+ ldr lr, [r5, #20]
+ vmov q15, q14 @ v4si
+ str r3, [r7, #108]
+ vmov q5, q13 @ v4si
+ str r2, [r7, #116]
+ vmov q10, q12 @ v4si
+ ldr r2, [r5, #12]
+ ldr ip, [r5, #16]
+ ldr r3, [r7, #64]
+ ldr r5, [r5, #24]
+ str r6, [r7, #120]
+ str r1, [r7, #92]
ldr r6, [r7, #60]
+ str r4, [r7, #100]
+ ldr r1, [r7, #116]
+ ldr r4, [r7, #108]
str r8, [r7, #96]
mov r8, r10
- ldr r4, [r7, #108]
- mov r10, r9
- ldr r9, [r7, #116]
str lr, [r7, #104]
+ mov r10, r9
mov lr, r3
- str r5, [r7, #92]
- movs r5, #0
+ mov r9, r5
str r6, [r7, #124]
- str r5, [r7, #100]
b .L92
.L93:
.align 3
@@ -213,25 +214,24 @@
str r5, [r7, #116]
add r10, r10, r1
vrev32.16 q3, q3
- eor lr, lr, r10
+ str r6, [r7, #108]
vadd.i32 q8, q8, q3
vrev32.16 q2, q2
vadd.i32 q15, q15, q2
mov fp, r3
- ldr r3, [r7, #112]
+ ldr r3, [r7, #100]
veor q4, q8, q1
- str r6, [r7, #112]
veor q6, q15, q5
+ add fp, fp, r2
eors r3, r3, r5
mov r5, r6
- ldr r6, [r7, #100]
+ ldr r6, [r7, #112]
vshl.i32 q1, q4, #12
vshl.i32 q5, q6, #12
- add fp, fp, r2
- eors r6, r6, r5
ror r3, r3, #16
+ eors r6, r6, r5
+ eor lr, lr, r10
vsri.32 q1, q4, #20
- ror lr, lr, #16
mov r5, r6
ldr r6, [r7, #124]
vsri.32 q5, q6, #20
@@ -239,25 +239,26 @@
eor r6, r6, fp
ror r5, r5, #16
vadd.i32 q9, q9, q1
- add r9, r9, lr
+ ror lr, lr, #16
ror r3, r6, #16
ldr r6, [r7, #124]
vadd.i32 q10, q10, q5
- str r3, [r7, #108]
+ add r9, r9, lr
veor q4, q9, q3
add ip, ip, r6
ldr r6, [r7, #104]
veor q6, q10, q2
eor r4, ip, r4
- eor r1, r9, r1
+ str r3, [r7, #104]
vshl.i32 q3, q4, #8
+ eor r1, r9, r1
mov r8, r6
ldr r6, [r7, #120]
vshl.i32 q2, q6, #8
ror r4, r4, #20
add r6, r6, r3
vsri.32 q3, q4, #24
- str r6, [r7, #104]
+ str r6, [r7, #100]
eors r2, r2, r6
ldr r6, [r7, #116]
vsri.32 q2, q6, #24
@@ -268,7 +269,7 @@
eor r0, r8, r0
vadd.i32 q15, q15, q2
mov r3, r6
- ldr r6, [r7, #112]
+ ldr r6, [r7, #108]
veor q6, q4, q1
ror r0, r0, #20
str r3, [r7, #112]
@@ -285,7 +286,7 @@
ror r1, r1, #20
eors r5, r5, r6
vsri.32 q8, q6, #25
- ldr r6, [r7, #108]
+ ldr r6, [r7, #104]
ror r3, r3, #24
ror r5, r5, #24
vsri.32 q1, q5, #25
@@ -297,7 +298,7 @@
vext.32 q8, q8, q8, #1
str ip, [r7, #124]
add ip, r5, r8
- ldr r5, [r7, #104]
+ ldr r5, [r7, #100]
eor lr, r10, lr
ror r6, r6, #24
vext.32 q1, q1, q1, #1
@@ -410,7 +411,7 @@
veor q6, q15, q1
ldr r3, [r7, #116]
vshl.i32 q1, q4, #7
- str r2, [r7, #112]
+ str r2, [r7, #100]
add r3, r3, r2
str r3, [r7, #120]
vshl.i32 q5, q6, #7
@@ -423,7 +424,7 @@
vsri.32 q5, q6, #25
ldr r3, [r7, #92]
ror r4, r4, #25
- str r6, [r7, #100]
+ str r6, [r7, #112]
ror r0, r0, #25
subs r3, r3, #1
str r5, [r7, #104]
@@ -437,308 +438,325 @@
vext.32 q5, q5, q5, #3
vext.32 q1, q1, q1, #3
bne .L3
- ldr r3, [r7, #80]
+ ldr r3, [r7, #84]
vadd.i32 q4, q12, q10
- str r9, [r7, #116]
+ str r9, [r7, #92]
mov r9, r10
mov r10, r8
ldr r8, [r7, #96]
str lr, [r7, #96]
mov lr, r5
- ldr r5, [r7, #56]
+ ldr r5, [r7, #52]
vadd.i32 q5, q13, q5
ldr r6, [r7, #76]
vadd.i32 q15, q14, q15
add fp, fp, r5
- ldr r5, [r7, #52]
- str r4, [r7, #108]
+ ldr r5, [r7, #48]
+ str r3, [r7, #104]
vadd.i32 q7, q14, q8
- ldr r4, [r7, #112]
- add r5, r10, r5
- str r3, [r7, #112]
- vadd.i32 q2, q11, q2
ldr r3, [r6, #12] @ unaligned
- vadd.i32 q6, q12, q9
- str r0, [r7, #92]
- vadd.i32 q1, q13, q1
+ add r10, r10, r5
+ str r0, [r7, #36]
+ vadd.i32 q2, q11, q2
ldr r0, [r6] @ unaligned
+ vadd.i32 q6, q12, q9
+ ldr r5, [r7, #104]
+ vadd.i32 q1, q13, q1
+ str r1, [r7, #116]
vadd.i32 q11, q11, q0
- str r1, [r7, #40]
- str r2, [r7, #36]
- vadd.i32 q3, q11, q3
ldr r1, [r6, #4] @ unaligned
- vadd.i32 q11, q11, q0
+ str r2, [r7, #32]
+ vadd.i32 q3, q11, q3
ldr r2, [r6, #8] @ unaligned
- str r5, [r7, #104]
vadd.i32 q11, q11, q0
- ldr r5, [r7, #112]
- ldr r10, [r7, #80]
+ str r4, [r7, #108]
+ ldr r4, [r7, #100]
+ vadd.i32 q11, q11, q0
stmia r5!, {r0, r1, r2, r3}
- mov r5, r10
- ldr r0, [r7, #84]
- ldr r2, [r7, #48]
- ldr r3, [r7, #72]
- vldr d20, [r0, #80]
- vldr d21, [r0, #88]
- add r9, r9, r2
+ ldr r2, [r7, #88]
+ ldr r3, [r7, #44]
+ ldr r5, [r7, #84]
+ vldr d20, [r2, #80]
+ vldr d21, [r2, #88]
+ add r3, r9, r3
+ str r3, [r7, #104]
veor q10, q10, q4
- ldr r2, [r7, #44]
+ ldr r3, [r7, #40]
+ add r3, r8, r3
+ str r3, [r7, #100]
+ ldr r3, [r7, #72]
+ vstr d20, [r2, #80]
+ vstr d21, [r2, #88]
adds r1, r4, r3
str r1, [r7, #28]
- add r2, r8, r2
- str r2, [r7, #32]
- vstr d20, [r0, #80]
- vstr d21, [r0, #88]
ldmia r5!, {r0, r1, r2, r3}
+ ldr r4, [r7, #68]
+ ldr r5, [r7, #112]
+ ldr r8, [r7, #84]
+ add r5, r5, r4
ldr r4, [r7, #96]
+ str r5, [r7, #24]
ldr r5, [r7, #64]
add r4, r4, r5
- ldr r5, [r7, #124]
+ ldr r5, [r7, #60]
str r4, [r7, #96]
- ldr r4, [r7, #60]
- add r5, r5, r4
- ldr r4, [r7, #88]
- str r5, [r7, #24]
- mov r5, r10
+ ldr r4, [r7, #124]
+ add r4, r4, r5
+ str r4, [r7, #20]
+ ldr r4, [r7, #80]
+ mov r5, r8
str r0, [r4] @ unaligned
mov r0, r4
str r1, [r4, #4] @ unaligned
- mov r8, r0
+ mov r4, r8
str r2, [r0, #8] @ unaligned
- mov r4, r10
+ mov r8, r0
str r3, [r0, #12] @ unaligned
+ mov r9, r4
ldr r0, [r6, #16]! @ unaligned
+ ldr r3, [r6, #12] @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
- ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #76]
stmia r5!, {r0, r1, r2, r3}
- mov r5, r10
- ldr r3, [r7, #84]
+ mov r5, r8
+ ldr r3, [r7, #88]
vldr d20, [r3, #80]
vldr d21, [r3, #88]
veor q10, q10, q5
vstr d20, [r3, #80]
vstr d21, [r3, #88]
ldmia r4!, {r0, r1, r2, r3}
- mov r4, r8
+ mov r4, r9
str r0, [r8, #16] @ unaligned
str r1, [r8, #20] @ unaligned
str r2, [r8, #24] @ unaligned
str r3, [r8, #28] @ unaligned
- mov r8, r4
+ mov r8, r5
ldr r0, [r6, #32]! @ unaligned
- str r10, [r7, #124]
+ mov r5, r9
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #76]
stmia r5!, {r0, r1, r2, r3}
- mov r5, r10
- ldr r2, [r7, #84]
- vldr d16, [r2, #80]
- vldr d17, [r2, #88]
+ mov r5, r8
+ ldr r1, [r7, #88]
+ vldr d16, [r1, #80]
+ vldr d17, [r1, #88]
veor q15, q8, q15
- vstr d30, [r2, #80]
- vstr d31, [r2, #88]
- ldmia r10!, {r0, r1, r2, r3}
- str r0, [r4, #32] @ unaligned
- str r1, [r4, #36] @ unaligned
- str r2, [r4, #40] @ unaligned
- str r3, [r4, #44] @ unaligned
+ vstr d30, [r1, #80]
+ vstr d31, [r1, #88]
+ ldmia r4!, {r0, r1, r2, r3}
+ mov r4, r9
+ str r0, [r8, #32] @ unaligned
+ str r1, [r8, #36] @ unaligned
+ str r2, [r8, #40] @ unaligned
+ str r3, [r8, #44] @ unaligned
+ mov r8, r5
ldr r0, [r6, #48]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #76]
- stmia r5!, {r0, r1, r2, r3}
- ldr r1, [r7, #84]
+ stmia r4!, {r0, r1, r2, r3}
+ mov r4, r9
+ ldr r1, [r7, #88]
+ str r9, [r7, #112]
vldr d18, [r1, #80]
vldr d19, [r1, #88]
veor q9, q9, q2
vstr d18, [r1, #80]
vstr d19, [r1, #88]
- ldr r3, [r7, #112]
- ldr r5, [r7, #80]
- mov r10, r3
- ldmia r10!, {r0, r1, r2, r3}
- str r0, [r4, #48] @ unaligned
- str r1, [r4, #52] @ unaligned
- str r2, [r4, #56] @ unaligned
- str r3, [r4, #60] @ unaligned
+ ldmia r9!, {r0, r1, r2, r3}
+ str r0, [r5, #48] @ unaligned
+ str r1, [r5, #52] @ unaligned
+ str r2, [r5, #56] @ unaligned
+ str r3, [r5, #60] @ unaligned
ldr r0, [r6, #64]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #76]
- stmia r5!, {r0, r1, r2, r3}
- ldr r1, [r7, #84]
- ldr r3, [r7, #112]
- ldr r5, [r7, #80]
+ mov r9, r6
+ mov r6, r4
+ stmia r6!, {r0, r1, r2, r3}
+ mov r6, r4
+ ldr r1, [r7, #88]
vldr d18, [r1, #80]
vldr d19, [r1, #88]
veor q9, q9, q6
- mov r10, r3
- str r5, [r7, #20]
vstr d18, [r1, #80]
vstr d19, [r1, #88]
- ldmia r10!, {r0, r1, r2, r3}
- str r1, [r4, #68] @ unaligned
- str r2, [r4, #72] @ unaligned
- str r3, [r4, #76] @ unaligned
- str r0, [r4, #64] @ unaligned
- ldr r0, [r6, #80]! @ unaligned
- ldr r1, [r6, #4] @ unaligned
- ldr r2, [r6, #8] @ unaligned
- ldr r3, [r6, #12] @ unaligned
+ ldmia r4!, {r0, r1, r2, r3}
+ mov r4, r6
+ str r3, [r5, #76] @ unaligned
+ mov r3, r9
+ str r2, [r5, #72] @ unaligned
+ str r0, [r5, #64] @ unaligned
+ str r1, [r5, #68] @ unaligned
+ mov r5, r4
+ ldr r0, [r3, #80]! @ unaligned
+ mov r9, r3
+ ldr r1, [r9, #4] @ unaligned
+ ldr r2, [r9, #8] @ unaligned
+ ldr r3, [r9, #12] @ unaligned
+ mov r9, r4
ldr r6, [r7, #76]
+ str r9, [r7, #124]
stmia r5!, {r0, r1, r2, r3}
- ldr r1, [r7, #84]
- ldr r3, [r7, #20]
- ldr r5, [r7, #80]
+ mov r5, r8
+ ldr r1, [r7, #88]
vldr d18, [r1, #80]
vldr d19, [r1, #88]
veor q1, q9, q1
- mov r10, r3
vstr d2, [r1, #80]
vstr d3, [r1, #88]
- ldmia r10!, {r0, r1, r2, r3}
- mov r10, r5
- str r0, [r4, #80] @ unaligned
- str r1, [r4, #84] @ unaligned
- str r2, [r4, #88] @ unaligned
- str r3, [r4, #92] @ unaligned
+ ldmia r4!, {r0, r1, r2, r3}
+ mov r4, r9
+ str r0, [r8, #80] @ unaligned
+ str r1, [r8, #84] @ unaligned
+ str r2, [r8, #88] @ unaligned
+ str r3, [r8, #92] @ unaligned
ldr r0, [r6, #96]! @ unaligned
+ ldr r3, [r6, #12] @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
- ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #76]
- stmia r5!, {r0, r1, r2, r3}
- mov r5, r10
- ldr r3, [r7, #84]
+ stmia r4!, {r0, r1, r2, r3}
+ mov r4, r9
+ ldr r3, [r7, #88]
vldr d16, [r3, #80]
vldr d17, [r3, #88]
veor q8, q8, q7
vstr d16, [r3, #80]
vstr d17, [r3, #88]
- ldmia r10!, {r0, r1, r2, r3}
- str r0, [r4, #96] @ unaligned
- str r1, [r4, #100] @ unaligned
- str r2, [r4, #104] @ unaligned
- str r3, [r4, #108] @ unaligned
+ ldmia r9!, {r0, r1, r2, r3}
+ str r0, [r5, #96] @ unaligned
+ str r1, [r5, #100] @ unaligned
+ str r2, [r5, #104] @ unaligned
+ str r3, [r5, #108] @ unaligned
ldr r0, [r6, #112]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
- mov r6, r5
+ mov r6, r4
stmia r6!, {r0, r1, r2, r3}
- ldr r3, [r7, #84]
+ mov r6, r5
+ ldr r3, [r7, #88]
vldr d16, [r3, #80]
vldr d17, [r3, #88]
veor q8, q8, q3
vstr d16, [r3, #80]
vstr d17, [r3, #88]
- ldmia r5!, {r0, r1, r2, r3}
- str r1, [r4, #116] @ unaligned
- ldr r1, [r7, #76]
- str r0, [r4, #112] @ unaligned
- str r2, [r4, #120] @ unaligned
- str r3, [r4, #124] @ unaligned
- ldr r3, [r1, #128]
- ldr r2, [r7, #104]
+ ldmia r4!, {r0, r1, r2, r3}
+ mov r4, r5
+ mov r8, r4
+ str r2, [r5, #120] @ unaligned
+ ldr r2, [r7, #76]
+ str r0, [r5, #112] @ unaligned
+ str r1, [r5, #116] @ unaligned
+ str r3, [r5, #124] @ unaligned
+ ldr r3, [r2, #128]
+ ldr r1, [r7, #104]
eor r3, fp, r3
- str r3, [r4, #128]
- ldr r3, [r1, #132]
- eors r2, r2, r3
- str r2, [r8, #132]
- ldr r3, [r1, #136]
- ldr r5, [r7, #68]
- ldr r6, [r7, #32]
- eor r3, r9, r3
- str r3, [r4, #136]
- ldr r3, [r1, #140]
- ldr r0, [r7, #92]
- eors r3, r3, r6
- ldr r6, [r7, #108]
+ str r3, [r5, #128]
+ ldr r3, [r2, #132]
+ mov r5, r2
+ eor r3, r10, r3
+ str r3, [r6, #132]
+ ldr r3, [r2, #136]
+ mov r6, r5
+ eors r1, r1, r3
+ str r1, [r8, #136]
+ ldr r1, [r7, #56]
+ ldr r3, [r2, #140]
+ ldr r2, [r7, #100]
+ ldr r0, [r7, #108]
+ eors r3, r3, r2
str r3, [r4, #140]
- ldr r3, [r5]
- ldr r2, [r1, #144]
- add r6, r6, r3
- eors r2, r2, r6
+ ldr r3, [r1]
+ ldr r2, [r5, #144]
+ mov r8, r0
+ add r8, r8, r3
+ mov r5, r6
+ mov r3, r8
+ eors r2, r2, r3
str r2, [r4, #144]
- ldr r2, [r5, #4]
- ldr r3, [r1, #148]
- add r0, r0, r2
+ ldr r3, [r6, #148]
+ ldr r2, [r1, #4]
ldr r6, [r7, #36]
- eors r3, r3, r0
- ldr r0, [r7, #40]
- str r3, [r4, #148]
- ldr r2, [r5, #8]
- ldr r3, [r1, #152]
- add r0, r0, r2
- eors r3, r3, r0
- str r3, [r4, #152]
- ldr r2, [r5, #12]
- mov r0, r4
- ldr r3, [r1, #156]
- mov r4, r1
add r6, r6, r2
- mov r1, r0
eors r3, r3, r6
- str r3, [r0, #156]
- ldr r2, [r5, #16]
- ldr r3, [r4, #160]
+ mov r6, r1
+ str r3, [r4, #148]
+ ldr r2, [r1, #8]
+ ldr r1, [r7, #116]
+ ldr r3, [r5, #152]
+ mov r8, r1
+ add r8, r8, r2
+ ldr r1, [r7, #32]
+ mov r2, r8
+ eors r3, r3, r2
+ str r3, [r4, #152]
+ mov r8, r4
+ ldr r2, [r6, #12]
+ ldr r3, [r5, #156]
+ add r1, r1, r2
+ eors r3, r3, r1
+ str r3, [r4, #156]
+ ldr r2, [r6, #16]
+ mov r1, r4
+ ldr r3, [r5, #160]
+ mov r4, r5
add ip, ip, r2
+ mov r5, r6
eor r3, ip, r3
str r3, [r1, #160]
- ldr r2, [r5, #20]
+ ldr r2, [r6, #20]
ldr r3, [r4, #164]
add lr, lr, r2
- ldr r2, [r7, #116]
+ ldr r2, [r7, #92]
eor r3, lr, r3
str r3, [r1, #164]
ldr r6, [r5, #24]
mov lr, r4
ldr r3, [r4, #168]
add r2, r2, r6
- mov r6, r4
+ ldr r6, [r7, #120]
eors r3, r3, r2
str r3, [r1, #168]
ldr r5, [r5, #28]
- mov r2, r1
ldr r3, [r4, #172]
- ldr r0, [r7, #120]
- add r0, r0, r5
- ldr r5, [r7, #24]
- eors r3, r3, r0
+ add r6, r6, r5
+ eors r3, r3, r6
str r3, [r1, #172]
- ldr r3, [r7, #72]
ldr r4, [r4, #176]
- ldr r1, [r7, #28]
- eors r4, r4, r1
- adds r1, r3, #3
- str r4, [r2, #176]
- ldr r3, [r7, #100]
+ ldr r0, [r7, #28]
+ ldr r5, [r7, #24]
+ eors r4, r4, r0
+ str r4, [r8, #176]
ldr r0, [lr, #180]
- str r1, [r7, #72]
- eors r3, r3, r0
- mov r0, r3
- mov r3, r2
- str r0, [r2, #180]
- adds r3, r3, #192
- ldr r1, [lr, #184]
ldr r2, [r7, #96]
+ eors r0, r0, r5
+ str r0, [r8, #180]
+ ldr r1, [lr, #184]
+ ldr r4, [r7, #20]
eors r1, r1, r2
- str r1, [r3, #-8]
+ str r1, [r8, #184]
ldr r2, [lr, #188]
- mov r1, r6
- adds r1, r1, #192
- str r1, [r7, #76]
- eors r2, r2, r5
- str r2, [r3, #-4]
+ add r1, lr, #192
+ ldr r3, [r7, #72]
+ eors r2, r2, r4
+ str r2, [r8, #188]
ldr r2, [r7, #16]
- str r3, [r7, #88]
+ adds r3, r3, #3
+ str r3, [r7, #72]
+ mov r3, r8
+ adds r3, r3, #192
+ str r1, [r7, #76]
cmp r2, r3
+ str r3, [r7, #80]
bne .L4
ldr r3, [r7, #12]
ldr r2, [r7, #4]
@@ -757,8 +775,8 @@
beq .L6
ldr r5, [r7, #12]
ldr r4, [r7, #16]
- ldr r6, [r7, #84]
- ldr lr, [r7, #80]
+ ldr r6, [r7, #88]
+ ldr lr, [r7, #84]
vldr d30, .L94
vldr d31, .L94+8
str fp, [r7, #120]
@@ -964,7 +982,7 @@
mov r9, r5
bhi .L88
vadd.i32 q12, q12, q10
- ldr r3, [r7, #84]
+ ldr r3, [r7, #88]
vst1.64 {d24-d25}, [r3:128]
.L14:
ldr r3, [r7, #8]
@@ -1001,7 +1019,7 @@
movcs r1, ip
cmp r1, #0
beq .L17
- ldr r5, [r7, #84]
+ ldr r5, [r7, #88]
cmp r1, #1
ldrb r0, [r0] @ zero_extendqisi2
add r3, r2, #1
@@ -1136,7 +1154,7 @@
ldr r5, [r7, #16]
cmp r6, #1
add r0, r1, r2
- ldr r1, [r7, #84]
+ ldr r1, [r7, #88]
add r1, r1, r2
vld1.64 {d18-d19}, [r0:64]
add r2, r2, r5
@@ -1174,7 +1192,7 @@
add r3, r3, lr
beq .L1
.L19:
- ldr r4, [r7, #84]
+ ldr r4, [r7, #88]
adds r2, r3, #1
ldr r1, [r7, #12]
cmp r2, r9
@@ -1289,7 +1307,7 @@
eor r1, r1, r0
strb r1, [r5, r2]
bls .L1
- ldr r2, [r7, #84]
+ ldr r2, [r7, #88]
ldrb r1, [r2, r3] @ zero_extendqisi2
ldr r2, [r7, #12]
ldrb r2, [r2, r3] @ zero_extendqisi2
@@ -1297,7 +1315,7 @@
ldr r1, [r7, #16]
strb r2, [r1, r3]
.L1:
- adds r7, r7, #156
+ adds r7, r7, #164
mov sp, r7
@ sp needed
vldm sp!, {d8-d15}
@@ -1305,7 +1323,7 @@
.L88:
ldr r5, [r7, #12]
vadd.i32 q12, q12, q10
- ldr r4, [r7, #80]
+ ldr r4, [r7, #84]
cmp r9, #31
ldr r0, [r5] @ unaligned
ldr r1, [r5, #4] @ unaligned
@@ -1313,7 +1331,7 @@
ldr r2, [r5, #8] @ unaligned
ldr r3, [r5, #12] @ unaligned
stmia r6!, {r0, r1, r2, r3}
- ldr r2, [r7, #84]
+ ldr r2, [r7, #88]
ldr r6, [r7, #16]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
@@ -1328,7 +1346,7 @@
str r3, [r6, #12] @ unaligned
bhi .L89
vadd.i32 q13, q13, q15
- ldr r3, [r7, #84]
+ ldr r3, [r7, #88]
vstr d26, [r3, #16]
vstr d27, [r3, #24]
b .L14
@@ -1337,7 +1355,7 @@
ldr r2, [r7, #12]
add r2, r2, r9
mov r5, r2
- ldr r2, [r7, #84]
+ ldr r2, [r7, #88]
add r2, r2, r3
mov r3, r2
.L24:
@@ -1347,17 +1365,18 @@
eor r2, r2, r1
strb r2, [r4], #1
bne .L24
- adds r7, r7, #156
+ adds r7, r7, #164
mov sp, r7
@ sp needed
vldm sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, fp, pc}
.L26:
- str fp, [r7, #16]
+ ldr r3, [r7, #80]
+ str r3, [r7, #16]
b .L2
.L89:
mov r3, r5
- ldr r4, [r7, #80]
+ ldr r4, [r7, #84]
ldr r0, [r3, #16]! @ unaligned
add lr, r1, #16
mov r5, r1
@@ -1368,7 +1387,7 @@
ldr r2, [r3, #8] @ unaligned
ldr r3, [r3, #12] @ unaligned
stmia r6!, {r0, r1, r2, r3}
- ldr r2, [r7, #84]
+ ldr r2, [r7, #88]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
veor q13, q9, q13
@@ -1381,16 +1400,16 @@
str r3, [lr, #12] @ unaligned
bhi .L90
vadd.i32 q8, q14, q8
- ldr r3, [r7, #84]
+ ldr r3, [r7, #88]
vstr d16, [r3, #32]
vstr d17, [r3, #40]
b .L14
.L90:
ldr r3, [r7, #12]
add lr, r5, #32
- ldr r4, [r7, #80]
+ ldr r4, [r7, #84]
vadd.i32 q8, q14, q8
- ldr r5, [r7, #84]
+ ldr r5, [r7, #88]
vadd.i32 q11, q11, q3
ldr r0, [r3, #32]! @ unaligned
mov r6, r4
diff --git a/crypto/cipher/e_chacha20poly1305.c b/crypto/cipher/e_chacha20poly1305.c
index 9dda1b0..34446b4 100644
--- a/crypto/cipher/e_chacha20poly1305.c
+++ b/crypto/cipher/e_chacha20poly1305.c
@@ -26,7 +26,6 @@
#define POLY1305_TAG_LEN 16
-#define CHACHA20_NONCE_LEN 8
struct aead_chacha20_poly1305_ctx {
unsigned char key[32];
@@ -99,8 +98,7 @@
poly1305_state poly1305;
const uint64_t in_len_64 = in_len;
- /* The underlying ChaCha implementation may not overflow the block
- * counter into the second counter word. Therefore we disallow
+ /* |CRYPTO_chacha_20| uses a 32-bit block counter. Therefore we disallow
* individual operations that work on more than 256GB at a time.
* |in_len_64| is needed because, on 32-bit platforms, size_t is only
* 32-bits and this produces a warning because it's always false.
@@ -121,18 +119,21 @@
return 0;
}
- if (nonce_len != CHACHA20_NONCE_LEN) {
+ if (nonce_len != 8) {
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_IV_TOO_LARGE);
return 0;
}
+ uint8_t nonce_96[12];
+ memset(nonce_96, 0, 4);
+ memcpy(nonce_96 + 4, nonce, 8);
memset(poly1305_key, 0, sizeof(poly1305_key));
CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key),
- c20_ctx->key, nonce, 0);
+ c20_ctx->key, nonce_96, 0);
CRYPTO_poly1305_init(&poly1305, poly1305_key);
poly1305_update_with_length(&poly1305, ad, ad_len);
- CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce, 1);
+ CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce_96, 1);
poly1305_update_with_length(&poly1305, out, in_len);
uint8_t tag[POLY1305_TAG_LEN] ALIGNED;
@@ -159,8 +160,7 @@
return 0;
}
- /* The underlying ChaCha implementation may not overflow the block
- * counter into the second counter word. Therefore we disallow
+ /* |CRYPTO_chacha_20| uses a 32-bit block counter. Therefore we disallow
* individual operations that work on more than 256GB at a time.
* |in_len_64| is needed because, on 32-bit platforms, size_t is only
* 32-bits and this produces a warning because it's always false.
@@ -171,10 +171,13 @@
return 0;
}
- if (nonce_len != CHACHA20_NONCE_LEN) {
+ if (nonce_len != 8) {
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_IV_TOO_LARGE);
return 0;
}
+ uint8_t nonce_96[12];
+ memset(nonce_96, 0, 4);
+ memcpy(nonce_96 + 4, nonce, 8);
plaintext_len = in_len - c20_ctx->tag_len;
@@ -185,7 +188,7 @@
memset(poly1305_key, 0, sizeof(poly1305_key));
CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key),
- c20_ctx->key, nonce, 0);
+ c20_ctx->key, nonce_96, 0);
CRYPTO_poly1305_init(&poly1305, poly1305_key);
poly1305_update_with_length(&poly1305, ad, ad_len);
@@ -197,14 +200,14 @@
return 0;
}
- CRYPTO_chacha_20(out, in, plaintext_len, c20_ctx->key, nonce, 1);
+ CRYPTO_chacha_20(out, in, plaintext_len, c20_ctx->key, nonce_96, 1);
*out_len = plaintext_len;
return 1;
}
static const EVP_AEAD aead_chacha20_poly1305 = {
32, /* key len */
- CHACHA20_NONCE_LEN, /* nonce len */
+ 8, /* nonce len */
POLY1305_TAG_LEN, /* overhead */
POLY1305_TAG_LEN, /* max tag length */
aead_chacha20_poly1305_init,
diff --git a/crypto/rand/rand.c b/crypto/rand/rand.c
index 8076b78..8b11728 100644
--- a/crypto/rand/rand.c
+++ b/crypto/rand/rand.c
@@ -159,17 +159,21 @@
if (todo > kMaxBytesPerCall) {
todo = kMaxBytesPerCall;
}
- CRYPTO_chacha_20(buf, buf, todo, state->key,
- (uint8_t *)&state->calls_used, 0);
+ uint8_t nonce[12];
+ memset(nonce, 0, 4);
+ memcpy(nonce + 4, &state->calls_used, sizeof(state->calls_used));
+ CRYPTO_chacha_20(buf, buf, todo, state->key, nonce, 0);
buf += todo;
remaining -= todo;
state->calls_used++;
}
} else {
if (sizeof(state->partial_block) - state->partial_block_used < len) {
+ uint8_t nonce[12];
+ memset(nonce, 0, 4);
+ memcpy(nonce + 4, &state->calls_used, sizeof(state->calls_used));
CRYPTO_chacha_20(state->partial_block, state->partial_block,
- sizeof(state->partial_block), state->key,
- (uint8_t *)&state->calls_used, 0);
+ sizeof(state->partial_block), state->key, nonce, 0);
state->partial_block_used = 0;
}
diff --git a/include/openssl/chacha.h b/include/openssl/chacha.h
index b7f5882..64713c2 100644
--- a/include/openssl/chacha.h
+++ b/include/openssl/chacha.h
@@ -27,7 +27,7 @@
* initial block counter is specified by |counter|. */
OPENSSL_EXPORT void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in,
size_t in_len, const uint8_t key[32],
- const uint8_t nonce[8], size_t counter);
+ const uint8_t nonce[12], uint32_t counter);
#if defined(__cplusplus)