Trim 88 bytes from each AES-GCM EVP_AEAD.

EVP_AEAD reused portions of EVP_CIPHER's GCM128_CONTEXT which contains both the
key and intermediate state for each operation. (The legacy OpenSSL EVP_CIPHER
API has no way to store just a key.) Split out a GCM128_KEY and store that
instead.

Change-Id: Ibc550084fa82963d3860346ed26f9cf170dceda5
Reviewed-on: https://boringssl-review.googlesource.com/32004
Commit-Queue: David Benjamin <davidben@google.com>
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
diff --git a/crypto/fipsmodule/cipher/e_aes.c b/crypto/fipsmodule/cipher/e_aes.c
index de2f10f..734a517 100644
--- a/crypto/fipsmodule/cipher/e_aes.c
+++ b/crypto/fipsmodule/cipher/e_aes.c
@@ -305,13 +305,13 @@
   return 1;
 }
 
-ctr128_f aes_ctr_set_key(AES_KEY *aes_key, GCM128_CONTEXT *gcm_ctx,
+ctr128_f aes_ctr_set_key(AES_KEY *aes_key, GCM128_KEY *gcm_key,
                          block128_f *out_block, const uint8_t *key,
                          size_t key_bytes) {
   if (hwaes_capable()) {
     aes_hw_set_encrypt_key(key, key_bytes * 8, aes_key);
-    if (gcm_ctx != NULL) {
-      CRYPTO_gcm128_init(gcm_ctx, aes_key, (block128_f)aes_hw_encrypt, 1);
+    if (gcm_key != NULL) {
+      CRYPTO_gcm128_init_key(gcm_key, aes_key, (block128_f)aes_hw_encrypt, 1);
     }
     if (out_block) {
       *out_block = (block128_f) aes_hw_encrypt;
@@ -321,8 +321,8 @@
 
   if (bsaes_capable()) {
     AES_set_encrypt_key(key, key_bytes * 8, aes_key);
-    if (gcm_ctx != NULL) {
-      CRYPTO_gcm128_init(gcm_ctx, aes_key, (block128_f)AES_encrypt, 0);
+    if (gcm_key != NULL) {
+      CRYPTO_gcm128_init_key(gcm_key, aes_key, (block128_f)AES_encrypt, 0);
     }
     if (out_block) {
       *out_block = (block128_f) AES_encrypt;
@@ -335,15 +335,15 @@
     if (out_block) {
       *out_block = (block128_f) vpaes_encrypt;
     }
-    if (gcm_ctx != NULL) {
-      CRYPTO_gcm128_init(gcm_ctx, aes_key, (block128_f)vpaes_encrypt, 0);
+    if (gcm_key != NULL) {
+      CRYPTO_gcm128_init_key(gcm_key, aes_key, (block128_f)vpaes_encrypt, 0);
     }
     return NULL;
   }
 
   AES_set_encrypt_key(key, key_bytes * 8, aes_key);
-  if (gcm_ctx != NULL) {
-    CRYPTO_gcm128_init(gcm_ctx, aes_key, (block128_f)AES_encrypt, 0);
+  if (gcm_key != NULL) {
+    CRYPTO_gcm128_init_key(gcm_key, aes_key, (block128_f)AES_encrypt, 0);
   }
   if (out_block) {
     *out_block = (block128_f) AES_encrypt;
@@ -358,8 +358,9 @@
     return 1;
   }
   if (key) {
-    gctx->ctr =
-        aes_ctr_set_key(&gctx->ks.ks, &gctx->gcm, NULL, key, ctx->key_len);
+    OPENSSL_memset(&gctx->gcm, 0, sizeof(gctx->gcm));
+    gctx->ctr = aes_ctr_set_key(&gctx->ks.ks, &gctx->gcm.gcm_key, NULL, key,
+                                ctx->key_len);
     // If we have an iv can set it directly, otherwise use saved IV.
     if (iv == NULL && gctx->iv_set) {
       iv = gctx->iv;
@@ -879,7 +880,7 @@
     double align;
     AES_KEY ks;
   } ks;
-  GCM128_CONTEXT gcm;
+  GCM128_KEY gcm_key;
   ctr128_f ctr;
 };
 
@@ -903,7 +904,7 @@
   }
 
   gcm_ctx->ctr =
-      aes_ctr_set_key(&gcm_ctx->ks.ks, &gcm_ctx->gcm, NULL, key, key_len);
+      aes_ctr_set_key(&gcm_ctx->ks.ks, &gcm_ctx->gcm_key, NULL, key, key_len);
   *out_tag_len = tag_len;
   return 1;
 }
@@ -941,7 +942,6 @@
                                      size_t extra_in_len,
                                      const uint8_t *ad, size_t ad_len) {
   const struct aead_aes_gcm_ctx *gcm_ctx = ctx->aead_state;
-  GCM128_CONTEXT gcm;
 
   if (extra_in_len + ctx->tag_len < ctx->tag_len) {
     OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_TOO_LARGE);
@@ -958,7 +958,9 @@
 
   const AES_KEY *key = &gcm_ctx->ks.ks;
 
-  OPENSSL_memcpy(&gcm, &gcm_ctx->gcm, sizeof(gcm));
+  GCM128_CONTEXT gcm;
+  OPENSSL_memset(&gcm, 0, sizeof(gcm));
+  OPENSSL_memcpy(&gcm.gcm_key, &gcm_ctx->gcm_key, sizeof(gcm.gcm_key));
   CRYPTO_gcm128_setiv(&gcm, key, nonce, nonce_len);
 
   if (ad_len > 0 && !CRYPTO_gcm128_aad(&gcm, ad, ad_len)) {
@@ -1002,7 +1004,6 @@
                                     const uint8_t *ad, size_t ad_len) {
   const struct aead_aes_gcm_ctx *gcm_ctx = ctx->aead_state;
   uint8_t tag[EVP_AEAD_AES_GCM_TAG_LEN];
-  GCM128_CONTEXT gcm;
 
   if (nonce_len == 0) {
     OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_INVALID_NONCE_SIZE);
@@ -1016,7 +1017,9 @@
 
   const AES_KEY *key = &gcm_ctx->ks.ks;
 
-  OPENSSL_memcpy(&gcm, &gcm_ctx->gcm, sizeof(gcm));
+  GCM128_CONTEXT gcm;
+  OPENSSL_memset(&gcm, 0, sizeof(gcm));
+  OPENSSL_memcpy(&gcm.gcm_key, &gcm_ctx->gcm_key, sizeof(gcm.gcm_key));
   CRYPTO_gcm128_setiv(&gcm, key, nonce, nonce_len);
 
   if (!CRYPTO_gcm128_aad(&gcm, ad, ad_len)) {
diff --git a/crypto/fipsmodule/cipher/internal.h b/crypto/fipsmodule/cipher/internal.h
index 7b5f23f..7c739fb 100644
--- a/crypto/fipsmodule/cipher/internal.h
+++ b/crypto/fipsmodule/cipher/internal.h
@@ -114,11 +114,11 @@
 
 // aes_ctr_set_key initialises |*aes_key| using |key_bytes| bytes from |key|,
 // where |key_bytes| must either be 16, 24 or 32. If not NULL, |*out_block| is
-// set to a function that encrypts single blocks. If not NULL, |*gcm_ctx| is
+// set to a function that encrypts single blocks. If not NULL, |*gcm_key| is
 // initialised to do GHASH with the given key. It returns a function for
 // optimised CTR-mode, or NULL if CTR-mode should be built using
 // |*out_block|.
-ctr128_f aes_ctr_set_key(AES_KEY *aes_key, GCM128_CONTEXT *gcm_ctx,
+ctr128_f aes_ctr_set_key(AES_KEY *aes_key, GCM128_KEY *gcm_key,
                          block128_f *out_block, const uint8_t *key,
                          size_t key_bytes);
 
diff --git a/crypto/fipsmodule/modes/gcm.c b/crypto/fipsmodule/modes/gcm.c
index 99d0e15..6eff479 100644
--- a/crypto/fipsmodule/modes/gcm.c
+++ b/crypto/fipsmodule/modes/gcm.c
@@ -243,9 +243,10 @@
                     size_t len);
 #endif
 
-#define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->Htable)
+#define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->gcm_key.Htable)
 #if defined(GHASH_ASM)
-#define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
+#define GHASH(ctx, in, len) \
+  gcm_ghash_4bit((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len)
 // GHASH_CHUNK is "stride parameter" missioned to mitigate cache
 // trashing effect. In other words idea is to hash data while it's
 // still in L1 cache after encryption pass...
@@ -337,10 +338,11 @@
 
 #ifdef GCM_FUNCREF_4BIT
 #undef GCM_MUL
-#define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->Htable)
+#define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable)
 #ifdef GHASH
 #undef GHASH
-#define GHASH(ctx, in, len) (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->Htable, in, len)
+#define GHASH(ctx, in, len) \
+  (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len)
 #endif
 #endif
 
@@ -417,27 +419,28 @@
 #endif
 }
 
-void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *aes_key,
-                        block128_f block, int block_is_hwaes) {
-  OPENSSL_memset(ctx, 0, sizeof(*ctx));
-  ctx->block = block;
+void CRYPTO_gcm128_init_key(GCM128_KEY *gcm_key, const void *aes_key,
+                            block128_f block, int block_is_hwaes) {
+  OPENSSL_memset(gcm_key, 0, sizeof(*gcm_key));
+  gcm_key->block = block;
 
-  uint8_t gcm_key[16];
-  OPENSSL_memset(gcm_key, 0, sizeof(gcm_key));
-  (*block)(gcm_key, gcm_key, aes_key);
+  uint8_t ghash_key[16];
+  OPENSSL_memset(ghash_key, 0, sizeof(ghash_key));
+  (*block)(ghash_key, ghash_key, aes_key);
 
   int is_avx;
-  CRYPTO_ghash_init(&ctx->gmult, &ctx->ghash, &ctx->H, ctx->Htable, &is_avx,
-                    gcm_key);
+  CRYPTO_ghash_init(&gcm_key->gmult, &gcm_key->ghash, &gcm_key->H,
+                    gcm_key->Htable, &is_avx, ghash_key);
 
-  ctx->use_aesni_gcm_crypt = (is_avx && block_is_hwaes) ? 1 : 0;
+  gcm_key->use_aesni_gcm_crypt = (is_avx && block_is_hwaes) ? 1 : 0;
 }
 
 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const void *key,
                          const uint8_t *iv, size_t len) {
   unsigned int ctr;
 #ifdef GCM_FUNCREF_4BIT
-  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
+  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
+      ctx->gcm_key.gmult;
 #endif
 
   ctx->Yi.u[0] = 0;
@@ -477,7 +480,7 @@
     ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
   }
 
-  (*ctx->block)(ctx->Yi.c, ctx->EK0.c, key);
+  (*ctx->gcm_key.block)(ctx->Yi.c, ctx->EK0.c, key);
   ++ctr;
   ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
 }
@@ -486,10 +489,11 @@
   unsigned int n;
   uint64_t alen = ctx->len.u[0];
 #ifdef GCM_FUNCREF_4BIT
-  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
+  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
+      ctx->gcm_key.gmult;
 #ifdef GHASH
   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
-                      size_t len) = ctx->ghash;
+                      size_t len) = ctx->gcm_key.ghash;
 #endif
 #endif
 
@@ -553,12 +557,13 @@
                           const uint8_t *in, uint8_t *out, size_t len) {
   unsigned int n, ctr;
   uint64_t mlen = ctx->len.u[1];
-  block128_f block = ctx->block;
+  block128_f block = ctx->gcm_key.block;
 #ifdef GCM_FUNCREF_4BIT
-  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
+  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
+      ctx->gcm_key.gmult;
 #ifdef GHASH
   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
-                      size_t len) = ctx->ghash;
+                      size_t len) = ctx->gcm_key.ghash;
 #endif
 #endif
 
@@ -679,12 +684,13 @@
                           size_t len) {
   unsigned int n, ctr;
   uint64_t mlen = ctx->len.u[1];
-  block128_f block = ctx->block;
+  block128_f block = ctx->gcm_key.block;
 #ifdef GCM_FUNCREF_4BIT
-  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
+  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
+      ctx->gcm_key.gmult;
 #ifdef GHASH
   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
-                      size_t len) = ctx->ghash;
+                      size_t len) = ctx->gcm_key.ghash;
 #endif
 #endif
 
@@ -813,10 +819,11 @@
   unsigned int n, ctr;
   uint64_t mlen = ctx->len.u[1];
 #ifdef GCM_FUNCREF_4BIT
-  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
+  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
+      ctx->gcm_key.gmult;
 #ifdef GHASH
   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
-                      size_t len) = ctx->ghash;
+                      size_t len) = ctx->gcm_key.ghash;
 #endif
 #endif
 
@@ -849,7 +856,7 @@
   }
 
 #if defined(AESNI_GCM)
-  if (ctx->use_aesni_gcm_crypt) {
+  if (ctx->gcm_key.use_aesni_gcm_crypt) {
     // |aesni_gcm_encrypt| may not process all the input given to it. It may
     // not process *any* of its input if it is deemed too small.
     size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
@@ -895,7 +902,7 @@
 #endif
   }
   if (len) {
-    (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
+    (*ctx->gcm_key.block)(ctx->Yi.c, ctx->EKi.c, key);
     ++ctr;
     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
     while (len--) {
@@ -914,10 +921,11 @@
   unsigned int n, ctr;
   uint64_t mlen = ctx->len.u[1];
 #ifdef GCM_FUNCREF_4BIT
-  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
+  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
+      ctx->gcm_key.gmult;
 #ifdef GHASH
   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
-                      size_t len) = ctx->ghash;
+                      size_t len) = ctx->gcm_key.ghash;
 #endif
 #endif
 
@@ -952,7 +960,7 @@
   }
 
 #if defined(AESNI_GCM)
-  if (ctx->use_aesni_gcm_crypt) {
+  if (ctx->gcm_key.use_aesni_gcm_crypt) {
     // |aesni_gcm_decrypt| may not process all the input given to it. It may
     // not process *any* of its input if it is deemed too small.
     size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
@@ -1001,7 +1009,7 @@
     len -= i;
   }
   if (len) {
-    (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
+    (*ctx->gcm_key.block)(ctx->Yi.c, ctx->EKi.c, key);
     ++ctr;
     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
     while (len--) {
@@ -1020,7 +1028,8 @@
   uint64_t alen = ctx->len.u[0] << 3;
   uint64_t clen = ctx->len.u[1] << 3;
 #ifdef GCM_FUNCREF_4BIT
-  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
+  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
+      ctx->gcm_key.gmult;
 #endif
 
   if (ctx->mres || ctx->ares) {
diff --git a/crypto/fipsmodule/modes/gcm_test.cc b/crypto/fipsmodule/modes/gcm_test.cc
index 5988945..ab8cc34 100644
--- a/crypto/fipsmodule/modes/gcm_test.cc
+++ b/crypto/fipsmodule/modes/gcm_test.cc
@@ -87,7 +87,8 @@
     ASSERT_EQ(0, AES_set_encrypt_key(key.data(), key.size() * 8, &aes_key));
 
     GCM128_CONTEXT ctx;
-    CRYPTO_gcm128_init(&ctx, &aes_key, (block128_f)AES_encrypt, 0);
+    OPENSSL_memset(&ctx, 0, sizeof(ctx));
+    CRYPTO_gcm128_init_key(&ctx.gcm_key, &aes_key, (block128_f)AES_encrypt, 0);
     CRYPTO_gcm128_setiv(&ctx, &aes_key, nonce.data(), nonce.size());
     if (!additional_data.empty()) {
       CRYPTO_gcm128_aad(&ctx, additional_data.data(), additional_data.size());
diff --git a/crypto/fipsmodule/modes/internal.h b/crypto/fipsmodule/modes/internal.h
index 338bf13..962c2ce 100644
--- a/crypto/fipsmodule/modes/internal.h
+++ b/crypto/fipsmodule/modes/internal.h
@@ -104,11 +104,30 @@
 typedef void (*ghash_func)(uint64_t Xi[2], const u128 Htable[16],
                            const uint8_t *inp, size_t len);
 
+typedef struct {
+  // Note the MOVBE-based, x86-64, GHASH assembly requires |H| and |Htable| to
+  // be the first two elements of this struct.
+  u128 H;
+  u128 Htable[16];
+  gmult_func gmult;
+  ghash_func ghash;
+
+  block128_f block;
+
+  // use_aesni_gcm_crypt is true if this context should use the assembly
+  // functions |aesni_gcm_encrypt| and |aesni_gcm_decrypt| to process data.
+  unsigned use_aesni_gcm_crypt:1;
+} GCM128_KEY;
+
+// gcm128_context, or |GCM128_CONTEXT| contains state for a single GCM
+// operation. The structure should be zero-initialized before use.
+//
 // This differs from upstream's |gcm128_context| in that it does not have the
 // |key| pointer, in order to make it |memcpy|-friendly. Rather the key is
-// passed into each call that needs it.
+// passed into each call that needs it. Additionally, |gcm_key| is split into a
+// separate struct.
 struct gcm128_context {
-  // Following 6 names follow names in GCM specification
+  // The following 5 names follow names in GCM specification
   union {
     uint64_t u[2];
     uint32_t d[4];
@@ -116,19 +135,11 @@
     size_t t[16 / sizeof(size_t)];
   } Yi, EKi, EK0, len, Xi;
 
-  // Note that the order of |Xi|, |H| and |Htable| is fixed by the MOVBE-based,
+  // Note that the order of |Xi| and |gcm_key| is fixed by the MOVBE-based,
   // x86-64, GHASH assembly.
-  u128 H;
-  u128 Htable[16];
-  gmult_func gmult;
-  ghash_func ghash;
+  GCM128_KEY gcm_key;
 
-  unsigned int mres, ares;
-  block128_f block;
-
-  // use_aesni_gcm_crypt is true if this context should use the assembly
-  // functions |aesni_gcm_encrypt| and |aesni_gcm_decrypt| to process data.
-  unsigned use_aesni_gcm_crypt:1;
+  unsigned mres, ares;
 };
 
 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
@@ -188,10 +199,11 @@
                        u128 *out_key, u128 out_table[16], int *out_is_avx,
                        const uint8_t *gcm_key);
 
-// CRYPTO_gcm128_init initialises |ctx| to use |block| (typically AES) with
-// the given key. |block_is_hwaes| is one if |block| is |aes_hw_encrypt|.
-OPENSSL_EXPORT void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *key,
-                                       block128_f block, int block_is_hwaes);
+// CRYPTO_gcm128_init_key initialises |gcm_key| to use |block| (typically AES)
+// with the given key. |block_is_hwaes| is one if |block| is |aes_hw_encrypt|.
+OPENSSL_EXPORT void CRYPTO_gcm128_init_key(GCM128_KEY *gcm_key, const void *key,
+                                           block128_f block,
+                                           int block_is_hwaes);
 
 // CRYPTO_gcm128_setiv sets the IV (nonce) for |ctx|. The |key| must be the
 // same key that was passed to |CRYPTO_gcm128_init|.