Use |alignas| for alignment.

MSVC doesn't have stdalign.h and so doesn't support |alignas| in C
code. Define |alignas(x)| as a synonym for |__decltype(align(x))|
instead for it.

This also fixes -Wcast-qual warnings in rsaz_exp.c.

Change-Id: Ifce9031724cb93f5a4aa1f567e7af61b272df9d5
Reviewed-on: https://boringssl-review.googlesource.com/6924
Reviewed-by: Adam Langley <agl@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
diff --git a/crypto/bn/rsaz_exp.c b/crypto/bn/rsaz_exp.c
index c802752..30f08e5 100644
--- a/crypto/bn/rsaz_exp.c
+++ b/crypto/bn/rsaz_exp.c
@@ -48,6 +48,9 @@
 
 #include <openssl/mem.h>
 
+#include "../internal.h"
+
+
 /*
  * See crypto/bn/asm/rsaz-avx2.pl for further details.
  */
@@ -58,42 +61,30 @@
 void rsaz_1024_gather5_avx2(void *val,const void *tbl,int i);
 void rsaz_1024_red2norm_avx2(void *norm,const void *red);
 
-#if defined(__GNUC__)
-# define ALIGN64	__attribute__((aligned(64)))
-#elif defined(_MSC_VER)
-# define ALIGN64	__declspec(align(64))
-#elif defined(__SUNPRO_C)
-# define ALIGN64
-# pragma align 64(one,two80)
-#else
-# define ALIGN64	/* not fatal, might hurt performance a little */
-#endif
-
-ALIGN64 static const BN_ULONG one[40] =
+alignas(64) static const BN_ULONG one[40] =
 	{1,0,0,    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-ALIGN64 static const BN_ULONG two80[40] =
+alignas(64) static const BN_ULONG two80[40] =
 	{0,0,1<<22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 
 void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16],
 	const BN_ULONG base_norm[16], const BN_ULONG exponent[16],
 	const BN_ULONG m_norm[16], const BN_ULONG RR[16], BN_ULONG k0)
 {
-	unsigned char	 storage[320*3+32*9*16+64];	/* 5.5KB */
-	unsigned char	*p_str = storage + (64-((size_t)storage%64));
+	alignas(64) uint8_t storage[(320 * 3) + (32 * 9 * 16)]; /* 5.5KB */
 	unsigned char	*a_inv, *m, *result,
-			*table_s = p_str+320*3,
+			*table_s = storage + (320 * 3),
 			*R2      = table_s;	/* borrow */
 	int index;
 	int wvalue;
 
-	if ((((size_t)p_str&4095)+320)>>12) {
-		result = p_str;
-		a_inv = p_str + 320;
-		m = p_str + 320*2;	/* should not cross page */
+	if (((((uintptr_t)storage & 4095) + 320) >> 12) != 0) {
+		result = storage;
+		a_inv = storage + 320;
+		m = storage + (320 * 2); /* should not cross page */
 	} else {
-		m = p_str;		/* should not cross page */
-		result = p_str + 320;
-		a_inv = p_str + 320*2;
+		m = storage;		/* should not cross page */
+		result = storage + 320;
+		a_inv = storage + (320 * 2);
 	}
 
 	rsaz_1024_norm2red_avx2(m, m_norm);
@@ -224,8 +215,9 @@
 	rsaz_1024_scatter5_avx2(table_s,result,31);
 #endif
 
+	const uint8_t *p_str = (const uint8_t *)exponent;
+
 	/* load first window */
-	p_str = (unsigned char*)exponent;
 	wvalue = p_str[127] >> 3;
 	rsaz_1024_gather5_avx2(result,table_s,wvalue);
 
@@ -235,7 +227,7 @@
 
 		rsaz_1024_sqr_avx2(result, result, m, k0, 5);
 
-		wvalue = *((unsigned short*)&p_str[index/8]);
+		wvalue = *((const unsigned short*)&p_str[index / 8]);
 		wvalue = (wvalue>> (index%8)) & 31;
 		index-=5;
 
@@ -274,11 +266,10 @@
 	const BN_ULONG base[8], const BN_ULONG exponent[8],
 	const BN_ULONG m[8], BN_ULONG k0, const BN_ULONG RR[8])
 {
-	unsigned char	 storage[16*8*8+64*2+64];	/* 1.2KB */
-	unsigned char	*table = storage + (64-((size_t)storage%64));
+	alignas(64) uint8_t storage[(16*8*8) + (64 * 2)]; /* 1.2KB */
+	unsigned char	*table = storage;
 	BN_ULONG	*a_inv = (BN_ULONG *)(table+16*8*8),
 			*temp  = (BN_ULONG *)(table+16*8*8+8*8);
-	unsigned char	*p_str = (unsigned char*)exponent;
 	int index;
 	unsigned int wvalue;
 
@@ -300,6 +291,8 @@
 	for (index=3; index<16; index++)
 		rsaz_512_mul_scatter4(temp, a_inv, m, k0, table, index);
 
+	const uint8_t *p_str = (const uint8_t *)exponent;
+
 	/* load first window */
 	wvalue = p_str[63];
 
diff --git a/crypto/chacha/chacha_vec.c b/crypto/chacha/chacha_vec.c
index 79ec941..1de59ff 100644
--- a/crypto/chacha/chacha_vec.c
+++ b/crypto/chacha/chacha_vec.c
@@ -25,6 +25,9 @@
 
 #include <openssl/chacha.h>
 
+#include "../internal.h"
+
+
 #if defined(ASM_GEN) ||          \
     !defined(OPENSSL_WINDOWS) && \
         (defined(OPENSSL_X86_64) || defined(OPENSSL_X86)) && defined(__SSE2__)
@@ -163,10 +166,10 @@
 	const unsigned *kp = (const unsigned *)key;
 #if defined(__ARM_NEON__)
 	uint32_t np[3];
-	uint8_t alignment_buffer[16] __attribute__((aligned(16)));
+	alignas(16) uint8_t alignment_buffer[16];
 #endif
 	vec s0, s1, s2, s3;
-	__attribute__ ((aligned (16))) unsigned chacha_const[] =
+	alignas(16) unsigned chacha_const[] =
 		{0x61707865,0x3320646E,0x79622D32,0x6B206574};
 #if defined(__ARM_NEON__)
 	memcpy(np, nonce, 12);
diff --git a/crypto/cipher/e_chacha20poly1305.c b/crypto/cipher/e_chacha20poly1305.c
index f384950..de8c9b4 100644
--- a/crypto/cipher/e_chacha20poly1305.c
+++ b/crypto/cipher/e_chacha20poly1305.c
@@ -23,6 +23,7 @@
 #include <openssl/poly1305.h>
 
 #include "internal.h"
+#include "../internal.h"
 
 
 #define POLY1305_TAG_LEN 16
@@ -79,12 +80,6 @@
   CRYPTO_poly1305_update(poly1305, length_bytes, sizeof(length_bytes));
 }
 
-#if defined(__arm__)
-#define ALIGNED __attribute__((aligned(16)))
-#else
-#define ALIGNED
-#endif
-
 typedef void (*aead_poly1305_update)(poly1305_state *ctx, const uint8_t *ad,
                                      size_t ad_len, const uint8_t *ciphertext,
                                      size_t ciphertext_len);
@@ -98,7 +93,7 @@
                           const uint8_t nonce[12], const uint8_t *ad,
                           size_t ad_len, const uint8_t *ciphertext,
                           size_t ciphertext_len) {
-  uint8_t poly1305_key[32] ALIGNED;
+  alignas(16) uint8_t poly1305_key[32];
   memset(poly1305_key, 0, sizeof(poly1305_key));
   CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key),
                    c20_ctx->key, nonce, 0);
@@ -139,7 +134,7 @@
 
   CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce, 1);
 
-  uint8_t tag[POLY1305_TAG_LEN] ALIGNED;
+  alignas(16) uint8_t tag[POLY1305_TAG_LEN];
   aead_poly1305(poly1305_update, tag, c20_ctx, nonce, ad, ad_len, out, in_len);
 
   memcpy(out + in_len, tag, c20_ctx->tag_len);
@@ -173,7 +168,7 @@
   }
 
   plaintext_len = in_len - c20_ctx->tag_len;
-  uint8_t tag[POLY1305_TAG_LEN] ALIGNED;
+  alignas(16) uint8_t tag[POLY1305_TAG_LEN];
   aead_poly1305(poly1305_update, tag, c20_ctx, nonce, ad, ad_len, in,
                 plaintext_len);
   if (CRYPTO_memcmp(tag, in + plaintext_len, c20_ctx->tag_len) != 0) {
diff --git a/crypto/ec/p256-x86_64-table.h b/crypto/ec/p256-x86_64-table.h
index 5b3254c..e4705f8 100644
--- a/crypto/ec/p256-x86_64-table.h
+++ b/crypto/ec/p256-x86_64-table.h
@@ -24,12 +24,7 @@
  * in order to increase the chances of using a large page but that appears to
  * lead to invalid ELF files being produced. */
 
-#if defined(__GNUC__)
-__attribute((aligned(4096)))
-#elif defined(_MSC_VER)
-__declspec(align(4096))
-#endif
-static const BN_ULONG
+static const alignas(4096) BN_ULONG
     ecp_nistz256_precomputed[37][64 * sizeof(P256_POINT_AFFINE) /
                                  sizeof(BN_ULONG)] = {
         {TOBN(0x79e730d4, 0x18a9143c), TOBN(0x75ba95fc, 0x5fedb601),
diff --git a/crypto/ec/p256-x86_64.c b/crypto/ec/p256-x86_64.c
index 2f7023d..2972483 100644
--- a/crypto/ec/p256-x86_64.c
+++ b/crypto/ec/p256-x86_64.c
@@ -39,15 +39,6 @@
     !defined(OPENSSL_SMALL)
 
 
-#if defined(__GNUC__)
-#define ALIGN(x) __attribute((aligned(x)))
-#elif defined(_MSC_VER)
-#define ALIGN(x) __declspec(align(x))
-#else
-#define ALIGN(x)
-#endif
-
-#define ALIGNPTR(p, N) ((uint8_t *)p + N - (size_t)p % N)
 #define P256_LIMBS (256 / BN_BITS2)
 
 typedef struct {
@@ -245,7 +236,7 @@
   /* A |P256_POINT| is (3 * 32) = 96 bytes, and the 64-byte alignment should
    * add no more than 63 bytes of overhead. Thus, |table| should require
    * ~1599 ((96 * 16) + 63) bytes of stack space. */
-  ALIGN(64) P256_POINT table[16];
+  alignas(64) P256_POINT table[16];
   uint8_t p_str[33];
 
 
@@ -326,7 +317,7 @@
   ecp_nistz256_point_add(&row[16 - 1], &row[15 - 1], &row[1 - 1]);
 
   BN_ULONG tmp[P256_LIMBS];
-  ALIGN(32) P256_POINT h;
+  alignas(32) P256_POINT h;
   unsigned index = 255;
   unsigned wvalue = p_str[(index - 1) / 8];
   wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
@@ -390,7 +381,7 @@
   static const unsigned kWindowSize = 7;
   static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
 
-  ALIGN(32) union {
+  alignas(32) union {
     P256_POINT p;
     P256_POINT_AFFINE a;
   } t, p;
diff --git a/crypto/internal.h b/crypto/internal.h
index bf45349..fe4ed73 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -112,6 +112,15 @@
 #include <openssl/ex_data.h>
 #include <openssl/thread.h>
 
+#if defined(_MSC_VER)
+#if !defined(__cplusplus) || _MSC_VER < 1900
+#define alignas(x) __declspec(align(x))
+#define alignof __alignof
+#endif
+#else
+#include <stdalign.h>
+#endif
+
 #if defined(OPENSSL_NO_THREADS)
 #elif defined(OPENSSL_WINDOWS)
 #pragma warning(push, 3)
diff --git a/crypto/poly1305/poly1305_arm.c b/crypto/poly1305/poly1305_arm.c
index 5e78dc0..82876e1 100644
--- a/crypto/poly1305/poly1305_arm.c
+++ b/crypto/poly1305/poly1305_arm.c
@@ -21,6 +21,8 @@
 
 #include <string.h>
 
+#include "../internal.h"
+
 
 typedef struct {
   uint32_t v[12]; /* for alignment; only using 10 */
@@ -170,7 +172,7 @@
   }
 }
 
-static const fe1305x2 zero __attribute__((aligned(16)));
+static const alignas(16) fe1305x2 zero;
 
 struct poly1305_state_st {
   uint8_t data[sizeof(fe1305x2[5]) + 128];
diff --git a/crypto/poly1305/poly1305_test.cc b/crypto/poly1305/poly1305_test.cc
index 3a72668..cae30a4 100644
--- a/crypto/poly1305/poly1305_test.cc
+++ b/crypto/poly1305/poly1305_test.cc
@@ -20,17 +20,10 @@
 #include <openssl/crypto.h>
 #include <openssl/poly1305.h>
 
+#include "../internal.h"
 #include "../test/file_test.h"
 
 
-// |CRYPTO_poly1305_finish| requires a 16-byte-aligned output.
-#if defined(OPENSSL_WINDOWS)
-// MSVC doesn't support C++11 |alignas|.
-#define ALIGNED __declspec(align(16))
-#else
-#define ALIGNED alignas(16)
-#endif
-
 static bool TestPoly1305(FileTest *t, void *arg) {
   std::vector<uint8_t> key, in, mac;
   if (!t->GetBytes(&key, "Key") ||
@@ -47,7 +40,8 @@
   poly1305_state state;
   CRYPTO_poly1305_init(&state, key.data());
   CRYPTO_poly1305_update(&state, in.data(), in.size());
-  ALIGNED uint8_t out[16];
+  // |CRYPTO_poly1305_finish| requires a 16-byte-aligned output.
+  alignas(16) uint8_t out[16];
   CRYPTO_poly1305_finish(&state, out);
   if (!t->ExpectBytesEqual(out, 16, mac.data(), mac.size())) {
     t->PrintLine("Single-shot Poly1305 failed.");
diff --git a/crypto/poly1305/poly1305_vec.c b/crypto/poly1305/poly1305_vec.c
index 3235b58..4a826cc 100644
--- a/crypto/poly1305/poly1305_vec.c
+++ b/crypto/poly1305/poly1305_vec.c
@@ -20,12 +20,13 @@
 
 #include <openssl/poly1305.h>
 
+#include "../internal.h"
+
 
 #if !defined(OPENSSL_WINDOWS) && defined(OPENSSL_X86_64)
 
 #include <emmintrin.h>
 
-#define ALIGN(x) __attribute__((aligned(x)))
 /* inline is not a keyword in C89. */
 #define INLINE 
 #define U8TO64_LE(m) (*(const uint64_t *)(m))
@@ -35,11 +36,11 @@
 typedef __m128i xmmi;
 typedef unsigned __int128 uint128_t;
 
-static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = {
+static const alignas(16) uint32_t poly1305_x64_sse2_message_mask[4] = {
     (1 << 26) - 1, 0, (1 << 26) - 1, 0};
-static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = {5, 0, 5, 0};
-static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = {(1 << 24), 0,
-                                                                (1 << 24), 0};
+static const alignas(16) uint32_t poly1305_x64_sse2_5[4] = {5, 0, 5, 0};
+static const alignas(16) uint32_t poly1305_x64_sse2_1shl128[4] = {
+    (1 << 24), 0, (1 << 24), 0};
 
 static uint128_t INLINE add128(uint128_t a, uint128_t b) { return a + b; }