Don't make assumptions about GCM128_CONTEXT layout in aesv8-gcm-armv8.pl

This isn't captured by the comments, the ABI tests have technically been
going out of bounds, and is entirely unnecessary. It can just take
Htable as a parameter.

Change-Id: Iad748d5b649333985ebaa1f84031fbe9a2339a85
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/59505
Auto-Submit: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/modes/asm/aesv8-gcm-armv8.pl b/crypto/fipsmodule/modes/asm/aesv8-gcm-armv8.pl
index bf86174..08ea58d 100644
--- a/crypto/fipsmodule/modes/asm/aesv8-gcm-armv8.pl
+++ b/crypto/fipsmodule/modes/asm/aesv8-gcm-armv8.pl
@@ -206,6 +206,7 @@
 $bit_length="x1";
 $output_ptr="x2";
 $current_tag="x3";
+$Htable="x6";
 $counter="x16";
 $cc="x8";
 
@@ -281,7 +282,8 @@
 #                           uint8_t *out,
 #                           u64 *Xi,
 #                           uint8_t ivec[16],
-#                           const void *key);
+#                           const void *key,
+#                           const void *Htable);
 #
 $code.=<<___;
 .global aes_gcm_enc_kernel
@@ -343,18 +345,18 @@
 	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          // AES block 2 - round 0
 	ldr     $rk5q, [$cc, #80]                                 // load rk5
 	aese    $ctr1b, $rk1  \n  aesmc   $ctr1b, $ctr1b          // AES block 1 - round 1
-	ldr     $h3q, [$current_tag, #80]                         // load h3l | h3h
+	ldr     $h3q, [$Htable, #48]                              // load h3l | h3h
 	ext     $h3b, $h3b, $h3b, #8
 	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          // AES block 3 - round 0
 	aese    $ctr2b, $rk1  \n  aesmc   $ctr2b, $ctr2b          // AES block 2 - round 1
 	ldr     $rk4q, [$cc, #64]                                 // load rk4
 	aese    $ctr1b, $rk2  \n  aesmc   $ctr1b, $ctr1b          // AES block 1 - round 2
-	ldr     $h2q, [$current_tag, #64]                         // load h2l | h2h
+	ldr     $h2q, [$Htable, #32]                              // load h2l | h2h
 	ext     $h2b, $h2b, $h2b, #8
 	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          // AES block 3 - round 1
 	ldr     $rk12q, [$cc, #192]                               // load rk12
 	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          // AES block 2 - round 2
-	ldr     $h4q, [$current_tag, #112]                        // load h4l | h4h
+	ldr     $h4q, [$Htable, #80]                              // load h4l | h4h
 	ext     $h4b, $h4b, $h4b, #8
 	aese    $ctr1b, $rk3  \n  aesmc   $ctr1b, $ctr1b          // AES block 1 - round 3
 	ldr     $rk11q, [$cc, #176]                               // load rk11
@@ -381,7 +383,7 @@
 	aese    $ctr3b, $rk6  \n  aesmc   $ctr3b, $ctr3b          // AES block 3 - round 6
 	ldr     $rk9q, [$cc, #144]                                // load rk9
 	aese    $ctr0b, $rk6  \n  aesmc   $ctr0b, $ctr0b          // AES block 0 - round 6
-	ldr     $h1q, [$current_tag, #32]                         // load h1l | h1h
+	ldr     $h1q, [$Htable]                                   // load h1l | h1h
 	ext     $h1b, $h1b, $h1b, #8
 	aese    $ctr2b, $rk6  \n  aesmc   $ctr2b, $ctr2b          // AES block 2 - round 6
 	ldr     $rk10q, [$cc, #160]                               // load rk10
@@ -967,13 +969,13 @@
 	ldr     $rk4q, [$cc, #64]                                 // load rk4
 	ldr     $rk1q, [$cc, #16]                                 // load rk1
 	aese    $ctr0b, $rk0  \n  aesmc   $ctr0b, $ctr0b          // AES block 0 - round 0
-	ldr     $h3q, [$current_tag, #80]                         // load h3l | h3h
+	ldr     $h3q, [$Htable, #48]                              // load h3l | h3h
 	ext     $h3b, $h3b, $h3b, #8
 	aese    $ctr3b, $rk0  \n  aesmc   $ctr3b, $ctr3b          // AES block 3 - round 0
-	ldr     $h4q, [$current_tag, #112]                        // load h4l | h4h
+	ldr     $h4q, [$Htable, #80]                              // load h4l | h4h
 	ext     $h4b, $h4b, $h4b, #8
 	aese    $ctr1b, $rk0  \n  aesmc   $ctr1b, $ctr1b          // AES block 1 - round 0
-	ldr     $h2q, [$current_tag, #64]                         // load h2l | h2h
+	ldr     $h2q, [$Htable, #32]                              // load h2l | h2h
 	ext     $h2b, $h2b, $h2b, #8
 	aese    $ctr2b, $rk0  \n  aesmc   $ctr2b, $ctr2b          // AES block 2 - round 0
 	ldr     $rk2q, [$cc, #32]                                 // load rk2
@@ -987,7 +989,7 @@
 	aese    $ctr3b, $rk1  \n  aesmc   $ctr3b, $ctr3b          // AES block 3 - round 1
 	ldr     $rk12q, [$cc, #192]                               // load rk12
 	aese    $ctr0b, $rk2  \n  aesmc   $ctr0b, $ctr0b          // AES block 0 - round 2
-	ldr     $h1q, [$current_tag, #32]                         // load h1l | h1h
+	ldr     $h1q, [$Htable]                                   // load h1l | h1h
 	ext     $h1b, $h1b, $h1b, #8
 	aese    $ctr2b, $rk2  \n  aesmc   $ctr2b, $ctr2b          // AES block 2 - round 2
 	ldr     $rk10q, [$cc, #160]                               // load rk10
diff --git a/crypto/fipsmodule/modes/gcm.c b/crypto/fipsmodule/modes/gcm.c
index f22fa9d..33374d0 100644
--- a/crypto/fipsmodule/modes/gcm.c
+++ b/crypto/fipsmodule/modes/gcm.c
@@ -134,14 +134,18 @@
 
 #if defined(HW_GCM) && defined(OPENSSL_X86_64)
 static size_t hw_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len,
-                             const AES_KEY *key, uint8_t ivec[16],
-                             uint64_t *Xi) {
+                             const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi,
+                             const u128 Htable[16]) {
+  // TODO(davidben): |aesni_gcm_encrypt| accesses |Htable| but does so assuming
+  // it is a known offset from |Xi|.
   return aesni_gcm_encrypt(in, out, len, key, ivec, Xi);
 }
 
 static size_t hw_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
-                             const AES_KEY *key, uint8_t ivec[16],
-                             uint64_t *Xi) {
+                             const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi,
+                             const u128 Htable[16]) {
+  // TODO(davidben): |aesni_gcm_decrypt| accesses |Htable| but does so assuming
+  // it is a known offset from |Xi|.
   return aesni_gcm_decrypt(in, out, len, key, ivec, Xi);
 }
 #endif  // HW_GCM && X86_64
@@ -149,24 +153,24 @@
 #if defined(HW_GCM) && defined(OPENSSL_AARCH64)
 
 static size_t hw_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len,
-                             const AES_KEY *key, uint8_t ivec[16],
-                             uint64_t *Xi) {
+                             const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi,
+                             const u128 Htable[16]) {
   const size_t len_blocks = len & kSizeTWithoutLower4Bits;
   if (!len_blocks) {
     return 0;
   }
-  aes_gcm_enc_kernel(in, len_blocks * 8, out, Xi, ivec, key);
+  aes_gcm_enc_kernel(in, len_blocks * 8, out, Xi, ivec, key, Htable);
   return len_blocks;
 }
 
 static size_t hw_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
-                             const AES_KEY *key, uint8_t ivec[16],
-                             uint64_t *Xi) {
+                             const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi,
+                             const u128 Htable[16]) {
   const size_t len_blocks = len & kSizeTWithoutLower4Bits;
   if (!len_blocks) {
     return 0;
   }
-  aes_gcm_dec_kernel(in, len_blocks * 8, out, Xi, ivec, key);
+  aes_gcm_dec_kernel(in, len_blocks * 8, out, Xi, ivec, key, Htable);
   return len_blocks;
 }
 
@@ -587,7 +591,8 @@
   if (ctx->gcm_key.use_hw_gcm_crypt && len > 0) {
     // |hw_gcm_encrypt| may not process all the input given to it. It may
     // not process *any* of its input if it is deemed too small.
-    size_t bulk = hw_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
+    size_t bulk = hw_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u,
+                                 ctx->gcm_key.Htable);
     in += bulk;
     out += bulk;
     len -= bulk;
@@ -675,7 +680,8 @@
   if (ctx->gcm_key.use_hw_gcm_crypt && len > 0) {
     // |hw_gcm_decrypt| may not process all the input given to it. It may
     // not process *any* of its input if it is deemed too small.
-    size_t bulk = hw_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
+    size_t bulk = hw_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u,
+                                 ctx->gcm_key.Htable);
     in += bulk;
     out += bulk;
     len -= bulk;
diff --git a/crypto/fipsmodule/modes/gcm_test.cc b/crypto/fipsmodule/modes/gcm_test.cc
index e7fcd5c..6ed3e72 100644
--- a/crypto/fipsmodule/modes/gcm_test.cc
+++ b/crypto/fipsmodule/modes/gcm_test.cc
@@ -217,8 +217,10 @@
     for (size_t key_bits = 128; key_bits <= 256; key_bits += 64) {
       AES_KEY aes_key;
       aes_hw_set_encrypt_key(kKey, key_bits, &aes_key);
-      CHECK_ABI(aes_gcm_enc_kernel, buf, sizeof(buf) * 8, buf, X, iv, &aes_key);
-      CHECK_ABI(aes_gcm_dec_kernel, buf, sizeof(buf) * 8, buf, X, iv, &aes_key);
+      CHECK_ABI(aes_gcm_enc_kernel, buf, sizeof(buf) * 8, buf, X, iv, &aes_key,
+                Htable);
+      CHECK_ABI(aes_gcm_dec_kernel, buf, sizeof(buf) * 8, buf, X, iv, &aes_key,
+                Htable);
     }
   }
 #endif
diff --git a/crypto/fipsmodule/modes/internal.h b/crypto/fipsmodule/modes/internal.h
index 5a14210..0daad55 100644
--- a/crypto/fipsmodule/modes/internal.h
+++ b/crypto/fipsmodule/modes/internal.h
@@ -303,9 +303,11 @@
 #define HW_GCM
 // These functions are defined in aesv8-gcm-armv8.pl.
 void aes_gcm_enc_kernel(const uint8_t *in, uint64_t in_bits, void *out,
-                        void *Xi, uint8_t *ivec, const AES_KEY *key);
+                        void *Xi, uint8_t *ivec, const AES_KEY *key,
+                        const u128 Htable[16]);
 void aes_gcm_dec_kernel(const uint8_t *in, uint64_t in_bits, void *out,
-                        void *Xi, uint8_t *ivec, const AES_KEY *key);
+                        void *Xi, uint8_t *ivec, const AES_KEY *key,
+                        const u128 Htable[16]);
 #endif
 
 #endif