Tidy up poly1305 alignment a bit poly1305_vec.cc was not using the align_pointer helper. poly1305_arm.cc was a bit of a mess. It had static asserts that we could align the overall struct, but it instead only aligned the array. But there's a size_t there, so actually the whole thing needs to be aligned. Change-Id: Icf11544a83b2388a94ca54cda59a5a86f2708991 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/94787 Presubmit-BoringSSL-Verified: boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com <boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: David Benjamin <davidben@google.com> Commit-Queue: Lily Chen <chlily@google.com> Reviewed-by: Lily Chen <chlily@google.com>
diff --git a/crypto/poly1305/poly1305_arm.cc b/crypto/poly1305/poly1305_arm.cc index 210715c..6620d70 100644 --- a/crypto/poly1305/poly1305_arm.cc +++ b/crypto/poly1305/poly1305_arm.cc
@@ -39,7 +39,7 @@ extern void addmulmod(fe1305x2 *r, const fe1305x2 *x, const fe1305x2 *y, const fe1305x2 *c); -extern int blocks(fe1305x2 *h, const fe1305x2 *precomp, const uint8_t *in, +extern int blocks(fe1305x2 *h, const fe1305x2 precomp[2], const uint8_t *in, size_t inlen); } @@ -108,16 +108,6 @@ r->v[8] = y4; } -static void store32(uint8_t out[4], uint32_t v) { OPENSSL_memcpy(out, &v, 4); } - -// load32 exists to avoid breaking strict aliasing rules in -// fe1305x2_frombytearray. -static uint32_t load32(const uint8_t t[4]) { - uint32_t tmp; - OPENSSL_memcpy(&tmp, t, sizeof(tmp)); - return tmp; -} - static void fe1305x2_tobytearray(uint8_t r[16], fe1305x2 *x) { uint32_t x0 = x->v[0]; uint32_t x1 = x->v[2]; @@ -134,10 +124,10 @@ x4 += x3 >> 26; x3 &= 0x3ffffff; - store32(r, x0 + (x1 << 26)); - store32(r + 4, (x1 >> 6) + (x2 << 20)); - store32(r + 8, (x2 >> 12) + (x3 << 14)); - store32(r + 12, (x3 >> 18) + (x4 << 8)); + CRYPTO_store_u32_le(r, x0 + (x1 << 26)); + CRYPTO_store_u32_le(r + 4, (x1 >> 6) + (x2 << 20)); + CRYPTO_store_u32_le(r + 8, (x2 >> 12) + (x3 << 14)); + CRYPTO_store_u32_le(r + 12, (x3 >> 18) + (x4 << 8)); } static void fe1305x2_frombytearray(fe1305x2 *r, const uint8_t *x, size_t xlen) { @@ -154,11 +144,11 @@ t[i] = 0; } - r->v[0] = 0x3ffffff & load32(t); - r->v[2] = 0x3ffffff & (load32(t + 3) >> 2); - r->v[4] = 0x3ffffff & (load32(t + 6) >> 4); - r->v[6] = 0x3ffffff & (load32(t + 9) >> 6); - r->v[8] = load32(t + 13); + r->v[0] = 0x3ffffff & CRYPTO_load_u32_le(t); + r->v[2] = 0x3ffffff & (CRYPTO_load_u32_le(t + 3) >> 2); + r->v[4] = 0x3ffffff & (CRYPTO_load_u32_le(t + 6) >> 4); + r->v[6] = 0x3ffffff & (CRYPTO_load_u32_le(t + 9) >> 6); + r->v[8] = CRYPTO_load_u32_le(t + 13); if (xlen) { for (i = 0; (i < 16) && (i < xlen); i++) { @@ -169,11 +159,11 @@ t[i] = 0; } - r->v[1] = 0x3ffffff & load32(t); - r->v[3] = 0x3ffffff & (load32(t + 3) >> 2); - r->v[5] = 0x3ffffff & (load32(t + 6) >> 4); - r->v[7] = 0x3ffffff & (load32(t + 9) >> 6); - r->v[9] = load32(t + 13); + r->v[1] = 0x3ffffff & CRYPTO_load_u32_le(t); + r->v[3] = 0x3ffffff & (CRYPTO_load_u32_le(t + 3) >> 2); + r->v[5] = 0x3ffffff & (CRYPTO_load_u32_le(t + 6) >> 4); + r->v[7] = 0x3ffffff & (CRYPTO_load_u32_le(t + 9) >> 6); + r->v[9] = CRYPTO_load_u32_le(t + 13); } else { r->v[1] = r->v[3] = r->v[5] = r->v[7] = r->v[9] = 0; } @@ -182,7 +172,7 @@ static const fe1305x2 zero alignas(16) = {0}; struct poly1305_state_st { - uint8_t data[sizeof(fe1305x2[5]) + 128]; + fe1305x2 r, h, c, precomp[2]; uint8_t buf[32]; size_t buf_used; uint8_t key[16]; @@ -192,18 +182,21 @@ sizeof(struct poly1305_state_st) + 63 <= sizeof(poly1305_state), "poly1305_state isn't large enough to hold aligned poly1305_state_st."); -void CRYPTO_poly1305_init_neon(poly1305_state *state, const uint8_t key[32]) { - struct poly1305_state_st *st = (struct poly1305_state_st *)(state); - fe1305x2 *const r = (fe1305x2 *)(st->data + (15 & (-(int)st->data))); - fe1305x2 *const h = r + 1; - fe1305x2 *const c = h + 1; - fe1305x2 *const precomp = c + 1; +static poly1305_state_st *poly1305_aligned_state(poly1305_state *state) { + return reinterpret_cast<poly1305_state_st *>(align_pointer(state, 64)); +} - r->v[1] = r->v[0] = 0x3ffffff & load32(key); - r->v[3] = r->v[2] = 0x3ffff03 & (load32(key + 3) >> 2); - r->v[5] = r->v[4] = 0x3ffc0ff & (load32(key + 6) >> 4); - r->v[7] = r->v[6] = 0x3f03fff & (load32(key + 9) >> 6); - r->v[9] = r->v[8] = 0x00fffff & (load32(key + 12) >> 8); +void CRYPTO_poly1305_init_neon(poly1305_state *state, const uint8_t key[32]) { + poly1305_state_st *st = poly1305_aligned_state(state); + fe1305x2 *const r = &st->r; + fe1305x2 *const h = &st->h; + fe1305x2 *const precomp = st->precomp; + + r->v[1] = r->v[0] = 0x3ffffff & CRYPTO_load_u32_le(key); + r->v[3] = r->v[2] = 0x3ffff03 & (CRYPTO_load_u32_le(key + 3) >> 2); + r->v[5] = r->v[4] = 0x3ffc0ff & (CRYPTO_load_u32_le(key + 6) >> 4); + r->v[7] = r->v[6] = 0x3f03fff & (CRYPTO_load_u32_le(key + 9) >> 6); + r->v[9] = r->v[8] = 0x00fffff & (CRYPTO_load_u32_le(key + 12) >> 8); for (size_t j = 0; j < 10; j++) { h->v[j] = 0; // XXX: should fast-forward a bit @@ -218,11 +211,10 @@ void CRYPTO_poly1305_update_neon(poly1305_state *state, const uint8_t *in, size_t in_len) { - struct poly1305_state_st *st = (struct poly1305_state_st *)(state); - fe1305x2 *const r = (fe1305x2 *)(st->data + (15 & (-(int)st->data))); - fe1305x2 *const h = r + 1; - fe1305x2 *const c = h + 1; - fe1305x2 *const precomp = c + 1; + poly1305_state_st *st = poly1305_aligned_state(state); + fe1305x2 *const h = &st->h; + fe1305x2 *const c = &st->c; + fe1305x2 *const precomp = st->precomp; if (st->buf_used) { size_t todo = 32 - st->buf_used; @@ -265,11 +257,11 @@ } void CRYPTO_poly1305_finish_neon(poly1305_state *state, uint8_t mac[16]) { - struct poly1305_state_st *st = (struct poly1305_state_st *)(state); - fe1305x2 *const r = (fe1305x2 *)(st->data + (15 & (-(int)st->data))); - fe1305x2 *const h = r + 1; - fe1305x2 *const c = h + 1; - fe1305x2 *const precomp = c + 1; + poly1305_state_st *st = poly1305_aligned_state(state); + fe1305x2 *const r = &st->r; + fe1305x2 *const h = &st->h; + fe1305x2 *const c = &st->c; + fe1305x2 *const precomp = st->precomp; addmulmod(h, h, precomp, &zero);
diff --git a/crypto/poly1305/poly1305_vec.cc b/crypto/poly1305/poly1305_vec.cc index 100bfb7..20f77f2 100644 --- a/crypto/poly1305/poly1305_vec.cc +++ b/crypto/poly1305/poly1305_vec.cc
@@ -85,7 +85,7 @@ "poly1305_state_internal_t"); poly1305_state_internal *poly1305_aligned_state(poly1305_state *state) { - return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63); + return reinterpret_cast<poly1305_state_internal *>(align_pointer(state, 64)); } size_t poly1305_min(size_t a, size_t b) { return (a < b) ? a : b; }