Add ABI tests for GCM.

Change-Id: If28096e677104c6109e31e31a636fee82ef4ba11
Reviewed-on: https://boringssl-review.googlesource.com/c/34266
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
index fc631c7..b267698 100644
--- a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
@@ -518,6 +518,7 @@
 .type	gcm_init_clmul,\@abi-omnipotent
 .align	16
 gcm_init_clmul:
+.cfi_startproc
 .L_init_clmul:
 ___
 $code.=<<___ if ($win64);
@@ -587,6 +588,7 @@
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .size	gcm_init_clmul,.-gcm_init_clmul
 ___
 }
@@ -598,6 +600,7 @@
 .type	gcm_gmult_clmul,\@abi-omnipotent
 .align	16
 gcm_gmult_clmul:
+.cfi_startproc
 .L_gmult_clmul:
 	movdqu		($Xip),$Xi
 	movdqa		.Lbswap_mask(%rip),$T3
@@ -634,6 +637,7 @@
 	pshufb		$T3,$Xi
 	movdqu		$Xi,($Xip)
 	ret
+.cfi_endproc
 .size	gcm_gmult_clmul,.-gcm_gmult_clmul
 ___
 }
@@ -647,6 +651,7 @@
 .type	gcm_ghash_clmul,\@abi-omnipotent
 .align	32
 gcm_ghash_clmul:
+.cfi_startproc
 .L_ghash_clmul:
 ___
 $code.=<<___ if ($win64);
@@ -995,6 +1000,7 @@
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .size	gcm_ghash_clmul,.-gcm_ghash_clmul
 ___
 }
@@ -1004,6 +1010,7 @@
 .type	gcm_init_avx,\@abi-omnipotent
 .align	32
 gcm_init_avx:
+.cfi_startproc
 ___
 if ($avx) {
 my ($Htbl,$Xip)=@_4args;
@@ -1132,6 +1139,7 @@
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .size	gcm_init_avx,.-gcm_init_avx
 ___
 } else {
@@ -1146,7 +1154,9 @@
 .type	gcm_gmult_avx,\@abi-omnipotent
 .align	32
 gcm_gmult_avx:
+.cfi_startproc
 	jmp	.L_gmult_clmul
+.cfi_endproc
 .size	gcm_gmult_avx,.-gcm_gmult_avx
 ___
 
@@ -1155,6 +1165,7 @@
 .type	gcm_ghash_avx,\@abi-omnipotent
 .align	32
 gcm_ghash_avx:
+.cfi_startproc
 ___
 if ($avx) {
 my ($Xip,$Htbl,$inp,$len)=@_4args;
@@ -1567,6 +1578,7 @@
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .size	gcm_ghash_avx,.-gcm_ghash_avx
 ___
 } else {
diff --git a/crypto/fipsmodule/modes/gcm.c b/crypto/fipsmodule/modes/gcm.c
index 5e556df..2a450cd 100644
--- a/crypto/fipsmodule/modes/gcm.c
+++ b/crypto/fipsmodule/modes/gcm.c
@@ -57,12 +57,6 @@
 #include "internal.h"
 #include "../../internal.h"
 
-#if !defined(OPENSSL_NO_ASM) &&                         \
-    (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
-     defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
-     defined(OPENSSL_PPC64LE))
-#define GHASH_ASM
-#endif
 
 #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
 #define REDUCE1BIT(V)                                                 \
@@ -82,7 +76,7 @@
 // bits of a |size_t|.
 static const size_t kSizeTWithoutLower4Bits = (size_t) -16;
 
-static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) {
+void gcm_init_4bit(u128 Htable[16], const uint64_t H[2]) {
   u128 V;
 
   Htable[0].hi = 0;
@@ -127,7 +121,7 @@
     PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
     PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
 
-static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
+void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
   u128 Z;
   int cnt = 15;
   size_t rem, nlo, nhi;
@@ -182,8 +176,8 @@
 // performance improvement, at least not on x86[_64]. It's here
 // mostly as reference and a placeholder for possible future
 // non-trivial optimization[s]...
-static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16],
-                           const uint8_t *inp, size_t len) {
+void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                    size_t len) {
   u128 Z;
   int cnt;
   size_t rem, nlo, nhi;
@@ -237,11 +231,7 @@
     Xi[1] = CRYPTO_bswap8(Z.lo);
   } while (inp += 16, len -= 16);
 }
-#else  // GHASH_ASM
-void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
-void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
-                    size_t len);
-#endif
+#endif   // !GHASH_ASM || AARCH64 || PPC64LE
 
 #define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->gcm_key.Htable)
 #if defined(GHASH_ASM)
@@ -251,90 +241,7 @@
 // trashing effect. In other words idea is to hash data while it's
 // still in L1 cache after encryption pass...
 #define GHASH_CHUNK (3 * 1024)
-#endif
-
-
-#if defined(GHASH_ASM)
-
-#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
-#define GCM_FUNCREF_4BIT
-void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
-void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
-void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
-                     size_t len);
-
-#if defined(OPENSSL_X86_64)
-#define GHASH_ASM_X86_64
-void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
-void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
-void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in,
-                   size_t len);
-#define AESNI_GCM
-size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len,
-                         const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi);
-size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
-                         const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi);
-#endif
-
-#if defined(OPENSSL_X86)
-#define GHASH_ASM_X86
-void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
-void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
-                        size_t len);
-#endif
-
-#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
-#include <openssl/arm_arch.h>
-#if __ARM_ARCH__ >= 7
-#define GHASH_ASM_ARM
-#define GCM_FUNCREF_4BIT
-
-static int pmull_capable(void) {
-  return CRYPTO_is_ARMv8_PMULL_capable();
-}
-
-void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]);
-void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]);
-void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
-                  size_t len);
-
-#if defined(OPENSSL_ARM)
-// 32-bit ARM also has support for doing GCM with NEON instructions.
-static int neon_capable(void) {
-  return CRYPTO_is_NEON_capable();
-}
-
-void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]);
-void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
-void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
-                    size_t len);
-#else
-// AArch64 only has the ARMv8 versions of functions.
-static int neon_capable(void) {
-  return 0;
-}
-static void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) {
-  abort();
-}
-static void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) {
-  abort();
-}
-static void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16],
-                           const uint8_t *inp, size_t len) {
-  abort();
-}
-#endif
-
-#endif
-#elif defined(OPENSSL_PPC64LE)
-#define GHASH_ASM_PPC64LE
-#define GCM_FUNCREF_4BIT
-void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
-void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]);
-void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
-                  size_t len);
-#endif
-#endif
+#endif  // GHASH_ASM
 
 #ifdef GCM_FUNCREF_4BIT
 #undef GCM_MUL
@@ -344,12 +251,11 @@
 #define GHASH(ctx, in, len) \
   (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len)
 #endif
-#endif
+#endif  // GCM_FUNCREF_4BIT
 
 void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
-                       u128 *out_key, u128 out_table[16],
-                       int *out_is_avx,
-                       const uint8_t *gcm_key) {
+                       u128 *out_key, u128 out_table[16], int *out_is_avx,
+                       const uint8_t gcm_key[16]) {
   *out_is_avx = 0;
 
   union {
@@ -387,14 +293,14 @@
     return;
   }
 #elif defined(GHASH_ASM_ARM)
-  if (pmull_capable()) {
+  if (gcm_pmull_capable()) {
     gcm_init_v8(out_table, H.u);
     *out_mult = gcm_gmult_v8;
     *out_hash = gcm_ghash_v8;
     return;
   }
 
-  if (neon_capable()) {
+  if (gcm_neon_capable()) {
     gcm_init_neon(out_table, H.u);
     *out_mult = gcm_gmult_neon;
     *out_hash = gcm_ghash_neon;
diff --git a/crypto/fipsmodule/modes/gcm_test.cc b/crypto/fipsmodule/modes/gcm_test.cc
index 16d9d09..fb17bbb 100644
--- a/crypto/fipsmodule/modes/gcm_test.cc
+++ b/crypto/fipsmodule/modes/gcm_test.cc
@@ -54,8 +54,10 @@
 #include <gtest/gtest.h>
 
 #include <openssl/aes.h>
+#include <openssl/cpu.h>
 
 #include "internal.h"
+#include "../../test/abi_test.h"
 #include "../../test/file_test.h"
 #include "../../test/test_util.h"
 
@@ -115,3 +117,43 @@
   EXPECT_EQ(UINT64_C(0x0807060504030201),
             CRYPTO_bswap8(UINT64_C(0x0102030405060708)));
 }
+
+#if defined(GHASH_ASM_X86_64) && defined(SUPPORTS_ABI_TEST)
+TEST(GCMTest, ABI) {
+  static const uint64_t kH[2] = {
+      UINT64_C(0x66e94bd4ef8a2c3b),
+      UINT64_C(0x884cfa59ca342b2e),
+  };
+  static const size_t kBlockCounts[] = {1, 2, 3, 4, 7, 8, 15, 16, 31, 32};
+  uint8_t buf[16 * 32];
+  OPENSSL_memset(buf, 42, sizeof(buf));
+
+  uint64_t X[2] = {
+      UINT64_C(0x0388dace60b6a392),
+      UINT64_C(0xf328c2b971b2fe78),
+  };
+
+  u128 Htable[16];
+  CHECK_ABI(gcm_init_4bit, Htable, kH);
+  CHECK_ABI(gcm_gmult_4bit, X, Htable);
+  for (size_t blocks : kBlockCounts) {
+    CHECK_ABI(gcm_ghash_4bit, X, Htable, buf, 16 * blocks);
+  }
+
+  if (crypto_gcm_clmul_enabled()) {
+    CHECK_ABI(gcm_init_clmul, Htable, kH);
+    CHECK_ABI(gcm_gmult_clmul, X, Htable);
+    for (size_t blocks : kBlockCounts) {
+      CHECK_ABI(gcm_ghash_clmul, X, Htable, buf, 16 * blocks);
+    }
+
+    if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) {  // AVX+MOVBE
+      CHECK_ABI(gcm_init_avx, Htable, kH);
+      CHECK_ABI(gcm_gmult_avx, X, Htable);
+      for (size_t blocks : kBlockCounts) {
+        CHECK_ABI(gcm_ghash_avx, X, Htable, buf, 16 * blocks);
+      }
+    }
+  }
+}
+#endif  // GHASH_ASM_X86_64 && SUPPORTS_ABI_TEST
diff --git a/crypto/fipsmodule/modes/internal.h b/crypto/fipsmodule/modes/internal.h
index 23aaca2..3163c50 100644
--- a/crypto/fipsmodule/modes/internal.h
+++ b/crypto/fipsmodule/modes/internal.h
@@ -50,8 +50,11 @@
 #define OPENSSL_HEADER_MODES_INTERNAL_H
 
 #include <openssl/base.h>
-#include <openssl/aes.h>
 
+#include <openssl/aes.h>
+#include <openssl/cpu.h>
+
+#include <stdlib.h>
 #include <string.h>
 
 #include "../../internal.h"
@@ -199,7 +202,7 @@
 // AVX implementation was used |*out_is_avx| will be true.
 void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
                        u128 *out_key, u128 out_table[16], int *out_is_avx,
-                       const uint8_t *gcm_key);
+                       const uint8_t gcm_key[16]);
 
 // CRYPTO_gcm128_init_key initialises |gcm_key| to use |block| (typically AES)
 // with the given key. |block_is_hwaes| is one if |block| is |aes_hw_encrypt|.
@@ -263,6 +266,99 @@
                                       size_t len);
 
 
+// GCM assembly.
+
+#if !defined(OPENSSL_NO_ASM) &&                         \
+    (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
+     defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
+     defined(OPENSSL_PPC64LE))
+#define GHASH_ASM
+#endif
+
+void gcm_init_4bit(u128 Htable[16], const uint64_t H[2]);
+void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                    size_t len);
+
+#if defined(GHASH_ASM)
+
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+#define GCM_FUNCREF_4BIT
+void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                     size_t len);
+
+#if defined(OPENSSL_X86_64)
+#define GHASH_ASM_X86_64
+void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in,
+                   size_t len);
+#define AESNI_GCM
+size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                         const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi);
+size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
+                         const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi);
+#endif  // OPENSSL_X86_64
+
+#if defined(OPENSSL_X86)
+#define GHASH_ASM_X86
+void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                        size_t len);
+#endif  // OPENSSL_X86
+
+#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
+#include <openssl/arm_arch.h>
+#if __ARM_ARCH__ >= 7
+#define GHASH_ASM_ARM
+#define GCM_FUNCREF_4BIT
+
+OPENSSL_INLINE int gcm_pmull_capable(void) {
+  return CRYPTO_is_ARMv8_PMULL_capable();
+}
+
+void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                  size_t len);
+
+#if defined(OPENSSL_ARM)
+// 32-bit ARM also has support for doing GCM with NEON instructions.
+OPENSSL_INLINE int gcm_neon_capable(void) { return CRYPTO_is_NEON_capable(); }
+
+void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                    size_t len);
+#else
+// AArch64 only has the ARMv8 versions of functions.
+OPENSSL_INLINE int gcm_neon_capable(void) { return 0; }
+OPENSSL_INLINE void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) {
+  abort();
+}
+OPENSSL_INLINE void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) {
+  abort();
+}
+OPENSSL_INLINE void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16],
+                                   const uint8_t *inp, size_t len) {
+  abort();
+}
+#endif  // OPENSSL_ARM
+
+#endif  // __ARM_ARCH__ >= 7
+#elif defined(OPENSSL_PPC64LE)
+#define GHASH_ASM_PPC64LE
+#define GCM_FUNCREF_4BIT
+void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                  size_t len);
+#endif
+#endif  // GHASH_ASM
+
+
 // CCM.
 
 typedef struct ccm128_context {