diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt
index e4b559a..6991ac9 100644
--- a/crypto/CMakeLists.txt
+++ b/crypto/CMakeLists.txt
@@ -80,7 +80,6 @@
 add_subdirectory(pool)
 
 # Level 0.2 - depends on nothing but itself
-add_subdirectory(modes)
 add_subdirectory(des)
 add_subdirectory(rc4)
 add_subdirectory(conf)
@@ -169,7 +168,6 @@
   $<TARGET_OBJECTS:fipsmodule>
   $<TARGET_OBJECTS:digest_extra>
   $<TARGET_OBJECTS:cipher>
-  $<TARGET_OBJECTS:modes>
   $<TARGET_OBJECTS:des>
   $<TARGET_OBJECTS:rc4>
   $<TARGET_OBJECTS:conf>
diff --git a/crypto/cipher/e_aes.c b/crypto/cipher/e_aes.c
index 861a563..a487cd9 100644
--- a/crypto/cipher/e_aes.c
+++ b/crypto/cipher/e_aes.c
@@ -59,7 +59,7 @@
 
 #include "internal.h"
 #include "../internal.h"
-#include "../modes/internal.h"
+#include "../fipsmodule/modes/internal.h"
 
 #if defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
 #include <openssl/arm_arch.h>
diff --git a/crypto/cipher/internal.h b/crypto/cipher/internal.h
index 549412c..595ee87 100644
--- a/crypto/cipher/internal.h
+++ b/crypto/cipher/internal.h
@@ -62,7 +62,7 @@
 #include <openssl/aead.h>
 #include <openssl/aes.h>
 
-#include "../modes/internal.h"
+#include "../fipsmodule/modes/internal.h"
 
 #if defined(__cplusplus)
 extern "C" {
diff --git a/crypto/fipsmodule/CMakeLists.txt b/crypto/fipsmodule/CMakeLists.txt
index 4db44a9..375a3db 100644
--- a/crypto/fipsmodule/CMakeLists.txt
+++ b/crypto/fipsmodule/CMakeLists.txt
@@ -12,6 +12,8 @@
     aesni-x86_64.${ASM_EXT}
     bsaes-x86_64.${ASM_EXT}
     vpaes-x86_64.${ASM_EXT}
+    aesni-gcm-x86_64.${ASM_EXT}
+    ghash-x86_64.${ASM_EXT}
   )
 endif()
 
@@ -26,6 +28,7 @@
     aes-586.${ASM_EXT}
     vpaes-x86.${ASM_EXT}
     aesni-x86.${ASM_EXT}
+    ghash-x86.${ASM_EXT}
   )
 endif()
 
@@ -39,6 +42,8 @@
     aes-armv4.${ASM_EXT}
     bsaes-armv7.${ASM_EXT}
     aesv8-armx.${ASM_EXT}
+    ghash-armv4.${ASM_EXT}
+    ghashv8-armx.${ASM_EXT}
   )
 endif()
 
@@ -50,6 +55,7 @@
     sha256-armv8.${ASM_EXT}
     sha512-armv8.${ASM_EXT}
     aesv8-armx.${ASM_EXT}
+    ghashv8-armx.${ASM_EXT}
   )
 endif()
 
@@ -58,11 +64,13 @@
     AES_ARCH_SOURCES
 
     aesp8-ppc.${ASM_EXT}
+    ghashp8-ppc.${ASM_EXT}
   )
 endif()
 
 perlasm(aes-586.${ASM_EXT} aes/asm/aes-586.pl)
 perlasm(aes-armv4.${ASM_EXT} aes/asm/aes-armv4.pl)
+perlasm(aesni-gcm-x86_64.${ASM_EXT} modes/asm/aesni-gcm-x86_64.pl)
 perlasm(aesni-x86_64.${ASM_EXT} aes/asm/aesni-x86_64.pl)
 perlasm(aesni-x86.${ASM_EXT} aes/asm/aesni-x86.pl)
 perlasm(aesp8-ppc.${ASM_EXT} aes/asm/aesp8-ppc.pl)
@@ -70,6 +78,11 @@
 perlasm(aes-x86_64.${ASM_EXT} aes/asm/aes-x86_64.pl)
 perlasm(bsaes-armv7.${ASM_EXT} aes/asm/bsaes-armv7.pl)
 perlasm(bsaes-x86_64.${ASM_EXT} aes/asm/bsaes-x86_64.pl)
+perlasm(ghash-armv4.${ASM_EXT} modes/asm/ghash-armv4.pl)
+perlasm(ghashp8-ppc.${ASM_EXT} modes/asm/ghashp8-ppc.pl)
+perlasm(ghashv8-armx.${ASM_EXT} modes/asm/ghashv8-armx.pl)
+perlasm(ghash-x86_64.${ASM_EXT} modes/asm/ghash-x86_64.pl)
+perlasm(ghash-x86.${ASM_EXT} modes/asm/ghash-x86.pl)
 perlasm(md5-586.${ASM_EXT} md5/asm/md5-586.pl)
 perlasm(md5-x86_64.${ASM_EXT} md5/asm/md5-x86_64.pl)
 perlasm(sha1-586.${ASM_EXT} sha/asm/sha1-586.pl)
@@ -174,3 +187,14 @@
 
 target_link_libraries(aes_test crypto)
 add_dependencies(all_tests aes_test)
+
+add_executable(
+  gcm_test
+
+  modes/gcm_test.cc
+
+  $<TARGET_OBJECTS:test_support>
+)
+
+target_link_libraries(gcm_test crypto)
+add_dependencies(all_tests gcm_test)
diff --git a/crypto/fipsmodule/aes/aes.c b/crypto/fipsmodule/aes/aes.c
index cd53a46..edd866c 100644
--- a/crypto/fipsmodule/aes/aes.c
+++ b/crypto/fipsmodule/aes/aes.c
@@ -53,7 +53,7 @@
 
 #include <openssl/cpu.h>
 
-#include "../../modes/internal.h"
+#include "../modes/internal.h"
 
 
 #if defined(OPENSSL_NO_ASM) || \
diff --git a/crypto/fipsmodule/aes/mode_wrappers.c b/crypto/fipsmodule/aes/mode_wrappers.c
index 81e77a4..4929920 100644
--- a/crypto/fipsmodule/aes/mode_wrappers.c
+++ b/crypto/fipsmodule/aes/mode_wrappers.c
@@ -50,7 +50,7 @@
 
 #include <assert.h>
 
-#include "../../modes/internal.h"
+#include "../modes/internal.h"
 
 
 void AES_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
diff --git a/crypto/fipsmodule/bcm.c b/crypto/fipsmodule/bcm.c
index aeb63e3..b17a7d8 100644
--- a/crypto/fipsmodule/bcm.c
+++ b/crypto/fipsmodule/bcm.c
@@ -35,6 +35,12 @@
 #include "hmac/hmac.c"
 #include "md4/md4.c"
 #include "md5/md5.c"
+#include "modes/cbc.c"
+#include "modes/cfb.c"
+#include "modes/ctr.c"
+#include "modes/gcm.c"
+#include "modes/ofb.c"
+#include "modes/polyval.c"
 #include "sha/sha1-altivec.c"
 #include "sha/sha1.c"
 #include "sha/sha256.c"
diff --git a/crypto/fipsmodule/delocate.go b/crypto/fipsmodule/delocate.go
index ebf9332..3de9ae3 100644
--- a/crypto/fipsmodule/delocate.go
+++ b/crypto/fipsmodule/delocate.go
@@ -122,7 +122,7 @@
 		return false
 	}
 	i += len(symbol)
-	return i == len(line) || line[i] == '+' || line[i] == '('
+	return i == len(line) || line[i] == '+' || line[i] == '(' || line[i] == '@'
 }
 
 // threadLocalOffsetFunc describes a function that fetches the offset to symbol
@@ -160,6 +160,11 @@
 	threadLocalOffsets := make(map[string]threadLocalOffsetFunc)
 
 	for lineNo, line := range lines {
+		// References to OPENSSL_ia32cap_P via the GOT result from C
+		// code. The OPENSSL_ia32cap_addr symbol, generated by this
+		// script, is just like a GOT entry, but at a known offset.
+		line = strings.Replace(line, "OPENSSL_ia32cap_P@GOTPCREL(%rip)", "OPENSSL_ia32cap_addr(%rip)", -1)
+
 		if referencesIA32CapDirectly(line) {
 			panic("reference to OPENSSL_ia32cap_P needs to be changed to indirect via OPENSSL_ia32cap_addr")
 		}
@@ -168,6 +173,7 @@
 			ia32capAddrNeeded = true
 		}
 
+		line = strings.Replace(line, "@PLT", "", -1)
 		parts := strings.Fields(strings.TrimSpace(line))
 
 		if len(parts) == 0 {
@@ -184,10 +190,10 @@
 				continue
 			}
 
-			if strings.HasSuffix(target, "_bss_get@PLT") || strings.HasSuffix(target, "_bss_get") {
+			if strings.HasSuffix(target, "_bss_get") {
 				// reference to a synthesised function. Don't
-				// indirect ourselves and drop PLT indirection.
-				ret = append(ret, strings.Replace(line, "@PLT", "", 1))
+				// indirect it.
+				ret = append(ret, line)
 				continue
 			}
 
@@ -201,27 +207,33 @@
 			}
 
 			redirectorName := "bcm_redirector_" + target
-
-			if strings.HasSuffix(target, "@PLT") {
-				withoutPLT := target[:len(target)-4]
-				if isGlobal, ok := symbols[withoutPLT]; ok {
-					newTarget := withoutPLT
-					if isGlobal {
-						newTarget = localTargetName(withoutPLT)
-					}
-					ret = append(ret, fmt.Sprintf("\t%s %s", parts[0], newTarget))
-					continue
-				}
-
-				redirectorName = redirectorName[:len(redirectorName)-4]
-			}
-
 			ret = append(ret, fmt.Sprintf("\t%s %s", parts[0], redirectorName))
 			redirectors[redirectorName] = target
 			continue
 
-		case "leaq":
-			if strings.Contains(line, "BORINGSSL_bcm_text_dummy_") {
+		case "leaq", "movq", "cmpq":
+			if parts[0] == "movq" && strings.Contains(line, "@GOTTPOFF(%rip)") {
+				// GOTTPOFF are offsets into the thread-local
+				// storage that are stored in the GOT. We have
+				// to move these relocations out of the module,
+				// but do not know whether rax is live at this
+				// point. Thus a normal function call might
+				// clobber a register and so we synthesize
+				// different functions for writing to each
+				// target register.
+				//
+				// (BoringSSL itself does not use __thread
+				// variables, but ASAN and MSAN may add these
+				// references for their bookkeeping.)
+				targetRegister := parts[2][1:]
+				symbol := strings.SplitN(parts[1], "@", 2)[0]
+				functionName := fmt.Sprintf("BORINGSSL_bcm_tpoff_to_%s_for_%s", targetRegister, symbol)
+				threadLocalOffsets[functionName] = threadLocalOffsetFunc{target: targetRegister, symbol: symbol}
+				ret = append(ret, "\tcallq "+functionName+"\n")
+				continue
+			}
+
+			if parts[0] == "leaq" {
 				line = strings.Replace(line, "BORINGSSL_bcm_text_dummy_", "BORINGSSL_bcm_text_", -1)
 			}
 
@@ -231,35 +243,27 @@
 				if isGlobal := symbols[target]; isGlobal {
 					line = strings.Replace(line, target, localTargetName(target), 1)
 				}
+
+				if strings.Contains(line, "@GOTPCREL") && parts[0] == "movq" {
+					line = strings.Replace(line, "@GOTPCREL", "", -1)
+					target = strings.Replace(target, "@GOTPCREL", "", -1)
+
+					if isGlobal := symbols[target]; isGlobal {
+						line = strings.Replace(line, target, localTargetName(target), 1)
+					}
+
+					// Nobody actually wants to read the
+					// code of a function. This is a load
+					// from the GOT which, now that we're
+					// referencing the symbol directly,
+					// needs to be transformed into an LEA.
+					line = strings.Replace(line, "movq", "leaq", 1)
+				}
 			}
 
 			ret = append(ret, line)
 			continue
 
-		case "movq":
-			if !strings.Contains(line, "@GOTTPOFF(%rip)") {
-				ret = append(ret, line)
-				continue
-			}
-
-			// GOTTPOFF are offsets into the thread-local storage
-			// that are stored in the GOT. We have to move these
-			// relocations out of the module, but do not know
-			// whether rax is live at this point. Thus a normal
-			// function call might clobber a register and so we
-			// synthesize different functions for writing to each
-			// target register.
-			//
-			// (BoringSSL itself does not use __thread variables,
-			// but ASAN and MSAN may add these references for their
-			// bookkeeping.)
-			targetRegister := parts[2][1:]
-			symbol := strings.SplitN(parts[1], "@", 2)[0]
-			functionName := fmt.Sprintf("BORINGSSL_bcm_tpoff_to_%s_for_%s", targetRegister, symbol)
-			threadLocalOffsets[functionName] = threadLocalOffsetFunc{target: targetRegister, symbol: symbol}
-			ret = append(ret, "\tcallq "+functionName+"\n")
-			continue
-
 		case ".file":
 			// Do not reorder .file directives. These define
 			// numbered files which are referenced by other debug
@@ -340,7 +344,7 @@
 	for _, name := range redirectorNames {
 		ret = append(ret, ".type "+name+", @function")
 		ret = append(ret, name+":")
-		ret = append(ret, "\tjmp "+redirectors[name])
+		ret = append(ret, "\tjmp "+redirectors[name]+"@PLT")
 	}
 
 	// Emit BSS accessor functions. Each is a single LEA followed by RET.
diff --git a/crypto/modes/asm/aesni-gcm-x86_64.pl b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
similarity index 99%
rename from crypto/modes/asm/aesni-gcm-x86_64.pl
rename to crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
index 139014f..57a6a8d 100644
--- a/crypto/modes/asm/aesni-gcm-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
@@ -39,7 +39,7 @@
 
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";
 
 # |$avx| in ghash-x86_64.pl must be set to at least 1; otherwise tags will
diff --git a/crypto/modes/asm/ghash-armv4.pl b/crypto/fipsmodule/modes/asm/ghash-armv4.pl
similarity index 99%
rename from crypto/modes/asm/ghash-armv4.pl
rename to crypto/fipsmodule/modes/asm/ghash-armv4.pl
index 1a03251..183fe60 100644
--- a/crypto/modes/asm/ghash-armv4.pl
+++ b/crypto/fipsmodule/modes/asm/ghash-armv4.pl
@@ -78,7 +78,7 @@
 if ($flavour && $flavour ne "void") {
     $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
     ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
-    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    ( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
     die "can't locate arm-xlate.pl";
 
     open STDOUT,"| \"$^X\" $xlate $flavour $output";
diff --git a/crypto/modes/asm/ghash-x86.pl b/crypto/fipsmodule/modes/asm/ghash-x86.pl
similarity index 99%
rename from crypto/modes/asm/ghash-x86.pl
rename to crypto/fipsmodule/modes/asm/ghash-x86.pl
index d3a79e1..dab88ef 100644
--- a/crypto/modes/asm/ghash-x86.pl
+++ b/crypto/fipsmodule/modes/asm/ghash-x86.pl
@@ -126,7 +126,7 @@
 # Westmere, 1.95 - on Sandy/Ivy Bridge, 1.76 - on Bulldozer.
 
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-push(@INC,"${dir}","${dir}../../perlasm");
+push(@INC,"${dir}","${dir}../../../perlasm");
 require "x86asm.pl";
 
 $output=pop;
diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
similarity index 99%
rename from crypto/modes/asm/ghash-x86_64.pl
rename to crypto/fipsmodule/modes/asm/ghash-x86_64.pl
index 0e6e348..1a74edf 100644
--- a/crypto/modes/asm/ghash-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
@@ -89,7 +89,7 @@
 
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";
 
 # See the notes about |$avx| in aesni-gcm-x86_64.pl; otherwise tags will be
@@ -212,7 +212,7 @@
 
 $code=<<___;
 .text
-.extern	OPENSSL_ia32cap_P
+.extern	OPENSSL_ia32cap_addr
 
 .globl	gcm_gmult_4bit
 .type	gcm_gmult_4bit,\@function,2
@@ -644,7 +644,8 @@
 my ($Xl,$Xm,$Xh,$Hkey3,$Hkey4)=map("%xmm$_",(11..15));
 
 $code.=<<___;
-	mov		OPENSSL_ia32cap_P+4(%rip),%eax
+	mov		OPENSSL_ia32cap_addr(%rip),%rax
+	mov		4(%rax),%eax
 	cmp		\$0x30,$len
 	jb		.Lskip4x
 
diff --git a/crypto/modes/asm/ghashp8-ppc.pl b/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
similarity index 99%
rename from crypto/modes/asm/ghashp8-ppc.pl
rename to crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
index f0598cb..c46cdb5 100644
--- a/crypto/modes/asm/ghashp8-ppc.pl
+++ b/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
@@ -57,7 +57,7 @@
 
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or
 die "can't locate ppc-xlate.pl";
 
 open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/fipsmodule/modes/asm/ghashv8-armx.pl
similarity index 99%
rename from crypto/modes/asm/ghashv8-armx.pl
rename to crypto/fipsmodule/modes/asm/ghashv8-armx.pl
index 15fc478..452bd63 100644
--- a/crypto/modes/asm/ghashv8-armx.pl
+++ b/crypto/fipsmodule/modes/asm/ghashv8-armx.pl
@@ -36,7 +36,7 @@
 
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
 die "can't locate arm-xlate.pl";
 
 open OUT,"| \"$^X\" $xlate $flavour $output";
diff --git a/crypto/modes/cbc.c b/crypto/fipsmodule/modes/cbc.c
similarity index 100%
rename from crypto/modes/cbc.c
rename to crypto/fipsmodule/modes/cbc.c
diff --git a/crypto/modes/cfb.c b/crypto/fipsmodule/modes/cfb.c
similarity index 99%
rename from crypto/modes/cfb.c
rename to crypto/fipsmodule/modes/cfb.c
index af15255..836eb3f 100644
--- a/crypto/modes/cfb.c
+++ b/crypto/fipsmodule/modes/cfb.c
@@ -54,7 +54,7 @@
 #include "internal.h"
 
 
-OPENSSL_COMPILE_ASSERT((16 % sizeof(size_t)) == 0, bad_size_t_size);
+OPENSSL_COMPILE_ASSERT((16 % sizeof(size_t)) == 0, bad_size_t_size_cfb);
 
 void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
                            const void *key, uint8_t ivec[16], unsigned *num,
diff --git a/crypto/modes/ctr.c b/crypto/fipsmodule/modes/ctr.c
similarity index 99%
rename from crypto/modes/ctr.c
rename to crypto/fipsmodule/modes/ctr.c
index c026d15..a191f39 100644
--- a/crypto/modes/ctr.c
+++ b/crypto/fipsmodule/modes/ctr.c
@@ -69,7 +69,7 @@
   } while (n);
 }
 
-OPENSSL_COMPILE_ASSERT((16 % sizeof(size_t)) == 0, bad_size_t_size);
+OPENSSL_COMPILE_ASSERT((16 % sizeof(size_t)) == 0, bad_size_t_size_ctr);
 
 /* The input encrypted as though 128bit counter mode is being used.  The extra
  * state information to record how much of the 128bit block we have used is
diff --git a/crypto/modes/gcm.c b/crypto/fipsmodule/modes/gcm.c
similarity index 97%
rename from crypto/modes/gcm.c
rename to crypto/fipsmodule/modes/gcm.c
index 1330ad6..39021eb 100644
--- a/crypto/modes/gcm.c
+++ b/crypto/fipsmodule/modes/gcm.c
@@ -55,8 +55,7 @@
 #include <openssl/cpu.h>
 
 #include "internal.h"
-#include "../internal.h"
-
+#include "../../internal.h"
 
 #if !defined(OPENSSL_NO_ASM) &&                         \
     (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
@@ -270,11 +269,6 @@
 void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in,
                    size_t len);
 #define AESNI_GCM
-static int aesni_gcm_enabled(GCM128_CONTEXT *ctx, ctr128_f stream) {
-  return stream == aesni_ctr32_encrypt_blocks &&
-         ctx->ghash == gcm_ghash_avx;
-}
-
 size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len,
                          const void *key, uint8_t ivec[16], uint64_t *Xi);
 size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
@@ -352,7 +346,10 @@
 
 void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
                        u128 *out_key, u128 out_table[16],
+                       int *out_use_aesni_gcm_encrypt,
                        const uint8_t *gcm_key) {
+  *out_use_aesni_gcm_encrypt = 0;
+
   union {
     uint64_t u[2];
     uint8_t c[16];
@@ -372,6 +369,7 @@
       gcm_init_avx(out_table, H.u);
       *out_mult = gcm_gmult_avx;
       *out_hash = gcm_ghash_avx;
+      *out_use_aesni_gcm_encrypt = 1;
       return;
     }
     gcm_init_clmul(out_table, H.u);
@@ -428,7 +426,11 @@
   OPENSSL_memset(gcm_key, 0, sizeof(gcm_key));
   (*block)(gcm_key, gcm_key, aes_key);
 
-  CRYPTO_ghash_init(&ctx->gmult, &ctx->ghash, &ctx->H, ctx->Htable, gcm_key);
+  int use_aesni_gcm_crypt;
+  CRYPTO_ghash_init(&ctx->gmult, &ctx->ghash, &ctx->H, ctx->Htable,
+                    &use_aesni_gcm_crypt, gcm_key);
+
+  ctx->use_aesni_gcm_crypt = use_aesni_gcm_crypt ? 1 : 0;
 }
 
 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const void *key,
@@ -858,7 +860,7 @@
   }
 
 #if defined(AESNI_GCM)
-  if (aesni_gcm_enabled(ctx, stream)) {
+  if (ctx->use_aesni_gcm_crypt) {
     /* |aesni_gcm_encrypt| may not process all the input given to it. It may
      * not process *any* of its input if it is deemed too small. */
     size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
@@ -961,7 +963,7 @@
   }
 
 #if defined(AESNI_GCM)
-  if (aesni_gcm_enabled(ctx, stream)) {
+  if (ctx->use_aesni_gcm_crypt) {
     /* |aesni_gcm_decrypt| may not process all the input given to it. It may
      * not process *any* of its input if it is deemed too small. */
     size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
@@ -1062,8 +1064,8 @@
 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
 int crypto_gcm_clmul_enabled(void) {
 #ifdef GHASH_ASM
-  return OPENSSL_ia32cap_P[0] & (1 << 24) &&  /* check FXSR bit */
-    OPENSSL_ia32cap_P[1] & (1 << 1);  /* check PCLMULQDQ bit */
+  return (OPENSSL_ia32cap_P[0] & (1 << 24)) && /* check FXSR bit */
+         (OPENSSL_ia32cap_P[1] & (1 << 1));    /* check PCLMULQDQ bit */
 #else
   return 0;
 #endif
diff --git a/crypto/modes/gcm_test.cc b/crypto/fipsmodule/modes/gcm_test.cc
similarity index 99%
rename from crypto/modes/gcm_test.cc
rename to crypto/fipsmodule/modes/gcm_test.cc
index 1a5e034..bf40807 100644
--- a/crypto/modes/gcm_test.cc
+++ b/crypto/fipsmodule/modes/gcm_test.cc
@@ -61,7 +61,7 @@
 #include <openssl/mem.h>
 
 #include "internal.h"
-#include "../test/test_util.h"
+#include "../../test/test_util.h"
 
 
 struct test_case {
diff --git a/crypto/modes/internal.h b/crypto/fipsmodule/modes/internal.h
similarity index 96%
rename from crypto/modes/internal.h
rename to crypto/fipsmodule/modes/internal.h
index 94072ec..898c10b 100644
--- a/crypto/modes/internal.h
+++ b/crypto/fipsmodule/modes/internal.h
@@ -53,15 +53,13 @@
 
 #include <string.h>
 
-#include "../internal.h"
+#include "../../internal.h"
 
 #if defined(__cplusplus)
 extern "C" {
 #endif
 
 
-#define asm __asm__
-
 #define STRICT_ALIGNMENT 1
 #if defined(OPENSSL_X86_64) || defined(OPENSSL_X86) || defined(OPENSSL_AARCH64)
 #undef STRICT_ALIGNMENT
@@ -159,6 +157,10 @@
 
   unsigned int mres, ares;
   block128_f block;
+
+  /* use_aesni_gcm_crypt is true if this context should use the assembly
+   * functions |aesni_gcm_encrypt| and |aesni_gcm_decrypt| to process data. */
+  unsigned use_aesni_gcm_crypt:1;
 };
 
 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
@@ -212,10 +214,12 @@
 
 /* CRYPTO_ghash_init writes a precomputed table of powers of |gcm_key| to
  * |out_table| and sets |*out_mult| and |*out_hash| to (potentially hardware
- * accelerated) functions for performing operations in the GHASH field. */
-void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
-                       u128 *out_key, u128 out_table[16],
-                       const uint8_t *gcm_key);
+ * accelerated) functions for performing operations in the GHASH field. If the
+ * assembly functions |aesni_gcm_encrypt| and |aesni_gcm_decrypt| can be used,
+ * |*out_use_aesni_gcm_crypt| will be true. */
+void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash, u128
+    *out_key, u128 out_table[16], int *out_use_aesni_gcm_crypt, const uint8_t
+    *gcm_key);
 
 /* CRYPTO_gcm128_init initialises |ctx| to use |block| (typically AES) with
  * the given key. */
diff --git a/crypto/modes/ofb.c b/crypto/fipsmodule/modes/ofb.c
similarity index 99%
rename from crypto/modes/ofb.c
rename to crypto/fipsmodule/modes/ofb.c
index 95d15c3..63bba68 100644
--- a/crypto/modes/ofb.c
+++ b/crypto/fipsmodule/modes/ofb.c
@@ -54,7 +54,7 @@
 #include "internal.h"
 
 
-OPENSSL_COMPILE_ASSERT((16 % sizeof(size_t)) == 0, bad_size_t_size);
+OPENSSL_COMPILE_ASSERT((16 % sizeof(size_t)) == 0, bad_size_t_size_ofb);
 
 void CRYPTO_ofb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
                            const void *key, uint8_t ivec[16], unsigned *num,
diff --git a/crypto/modes/polyval.c b/crypto/fipsmodule/modes/polyval.c
similarity index 96%
rename from crypto/modes/polyval.c
rename to crypto/fipsmodule/modes/polyval.c
index 83df0ab..0f091bc 100644
--- a/crypto/modes/polyval.c
+++ b/crypto/fipsmodule/modes/polyval.c
@@ -20,7 +20,7 @@
 #include <string.h>
 
 #include "internal.h"
-#include "../internal.h"
+#include "../../internal.h"
 
 
 /* byte_reverse reverses the order of the bytes in |b->c|. */
@@ -57,7 +57,9 @@
   OPENSSL_memcpy(H.c, key, 16);
   reverse_and_mulX_ghash(&H);
 
-  CRYPTO_ghash_init(&ctx->gmult, &ctx->ghash, &ctx->H, ctx->Htable, H.c);
+  int unused;
+  CRYPTO_ghash_init(&ctx->gmult, &ctx->ghash, &ctx->H, ctx->Htable, &unused,
+                    H.c);
   OPENSSL_memset(&ctx->S, 0, sizeof(ctx->S));
 }
 
diff --git a/crypto/modes/CMakeLists.txt b/crypto/modes/CMakeLists.txt
deleted file mode 100644
index dc9e504..0000000
--- a/crypto/modes/CMakeLists.txt
+++ /dev/null
@@ -1,76 +0,0 @@
-include_directories(../../include)
-
-if (${ARCH} STREQUAL "x86_64")
-  set(
-    MODES_ARCH_SOURCES
-
-    aesni-gcm-x86_64.${ASM_EXT}
-    ghash-x86_64.${ASM_EXT}
-  )
-endif()
-
-if (${ARCH} STREQUAL "x86")
-  set(
-    MODES_ARCH_SOURCES
-
-    ghash-x86.${ASM_EXT}
-  )
-endif()
-
-if (${ARCH} STREQUAL "arm")
-  set(
-    MODES_ARCH_SOURCES
-
-    ghash-armv4.${ASM_EXT}
-    ghashv8-armx.${ASM_EXT}
-  )
-endif()
-
-if (${ARCH} STREQUAL "aarch64")
-  set(
-    MODES_ARCH_SOURCES
-
-    ghashv8-armx.${ASM_EXT}
-  )
-endif()
-
-if (${ARCH} STREQUAL "ppc64le")
-  set(
-    MODES_ARCH_SOURCES
-
-    ghashp8-ppc.${ASM_EXT}
-  )
-endif()
-
-add_library(
-  modes
-
-  OBJECT
-
-  cbc.c
-  cfb.c
-  ctr.c
-  gcm.c
-  ofb.c
-  polyval.c
-
-  ${MODES_ARCH_SOURCES}
-)
-
-perlasm(aesni-gcm-x86_64.${ASM_EXT} asm/aesni-gcm-x86_64.pl)
-perlasm(ghash-x86_64.${ASM_EXT} asm/ghash-x86_64.pl)
-perlasm(ghash-x86.${ASM_EXT} asm/ghash-x86.pl)
-perlasm(ghash-armv4.${ASM_EXT} asm/ghash-armv4.pl)
-perlasm(ghashv8-armx.${ASM_EXT} asm/ghashv8-armx.pl)
-perlasm(ghashp8-ppc.${ASM_EXT} asm/ghashp8-ppc.pl)
-
-add_executable(
-  gcm_test
-
-  gcm_test.cc
-
-  $<TARGET_OBJECTS:test_support>
-)
-
-target_link_libraries(gcm_test crypto)
-add_dependencies(all_tests gcm_test)
diff --git a/crypto/rand/internal.h b/crypto/rand/internal.h
index 58ffaaa..349daf0 100644
--- a/crypto/rand/internal.h
+++ b/crypto/rand/internal.h
@@ -18,7 +18,7 @@
 #include <openssl/aes.h>
 
 #include "../internal.h"
-#include "../modes/internal.h"
+#include "../fipsmodule/modes/internal.h"
 
 #if defined(__cplusplus)
 extern "C" {
diff --git a/decrepit/xts/xts.c b/decrepit/xts/xts.c
index 2811445..b0eb572 100644
--- a/decrepit/xts/xts.c
+++ b/decrepit/xts/xts.c
@@ -53,7 +53,7 @@
 #include <openssl/aes.h>
 #include <openssl/cipher.h>
 
-#include "../crypto/modes/internal.h"
+#include "../crypto/fipsmodule/modes/internal.h"
 
 
 typedef struct xts128_context {
diff --git a/util/all_tests.json b/util/all_tests.json
index 0f0a589..3f7a501 100644
--- a/util/all_tests.json
+++ b/util/all_tests.json
@@ -45,7 +45,7 @@
 	["crypto/hkdf/hkdf_test"],
 	["crypto/hmac_extra/hmac_test", "crypto/hmac_extra/hmac_tests.txt"],
 	["crypto/lhash/lhash_test"],
-	["crypto/modes/gcm_test"],
+	["crypto/fipsmodule/gcm_test"],
 	["crypto/obj/obj_test"],
 	["crypto/pkcs7/pkcs7_test"],
 	["crypto/pkcs8/pkcs12_test"],
