Support delocating vpbroadcastq.

(This can be generated with -mavx2.)

Change-Id: I6d92d9e93eb448357342ef86d050321f0ef40f9e
Reviewed-on: https://boringssl-review.googlesource.com/24504
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
diff --git a/util/fipstools/delocate.go b/util/fipstools/delocate.go
index 6d56f92..2c1ba49 100644
--- a/util/fipstools/delocate.go
+++ b/util/fipstools/delocate.go
@@ -792,6 +792,9 @@
 const (
 	instrPush instructionType = iota
 	instrMove
+	// instrTransformingMove is essentially a move, but it performs some
+	// transformation of the data during the process.
+	instrTransformingMove
 	instrJump
 	instrConditionalMove
 	instrOther
@@ -818,6 +821,11 @@
 		if len(args) == 1 {
 			return instrJump
 		}
+
+	case "vpbroadcastq":
+		if len(args) == 2 {
+			return instrTransformingMove
+		}
 	}
 
 	return instrOther
@@ -870,6 +878,13 @@
 	}
 }
 
+func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc {
+	return func(k func()) {
+		k()
+		w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n")
+	}
+}
+
 func isValidLEATarget(reg string) bool {
 	return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm")
 }
@@ -1018,6 +1033,13 @@
 				case instrMove:
 					assertNodeType(argNodes[1], ruleRegisterOrConstant)
 					targetReg = d.contents(argNodes[1])
+				case instrTransformingMove:
+					assertNodeType(argNodes[1], ruleRegisterOrConstant)
+					targetReg = d.contents(argNodes[1])
+					wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg))
+					if isValidLEATarget(targetReg) {
+						return nil, fmt.Errorf("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.")
+					}
 				default:
 					return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName)
 				}
diff --git a/util/fipstools/testdata/x86_64-GOTRewrite/in.s b/util/fipstools/testdata/x86_64-GOTRewrite/in.s
index 9b4b201..0f9c70e 100644
--- a/util/fipstools/testdata/x86_64-GOTRewrite/in.s
+++ b/util/fipstools/testdata/x86_64-GOTRewrite/in.s
@@ -34,4 +34,8 @@
 	movq foobar_bss_get@GOTPCREL(%rip), %r11
 	movq OPENSSL_ia32cap_get@GOTPCREL(%rip), %r11
 
+	# Transforming moves run the transform in-place after the load.
+	vpbroadcastq stderr@GOTPCREL(%rip), %xmm0
+	vpbroadcastq foo@GOTPCREL(%rip), %xmm0
+
 .comm foobar,64,32
diff --git a/util/fipstools/testdata/x86_64-GOTRewrite/out.s b/util/fipstools/testdata/x86_64-GOTRewrite/out.s
index 0e07721..0420af6 100644
--- a/util/fipstools/testdata/x86_64-GOTRewrite/out.s
+++ b/util/fipstools/testdata/x86_64-GOTRewrite/out.s
@@ -124,6 +124,28 @@
 # WAS movq OPENSSL_ia32cap_get@GOTPCREL(%rip), %r11
 	leaq	OPENSSL_ia32cap_get(%rip), %r11
 
+	# Transforming moves run the transform in-place after the load.
+# WAS vpbroadcastq stderr@GOTPCREL(%rip), %xmm0
+	leaq -128(%rsp), %rsp
+	pushq %rax
+	pushf
+	leaq stderr_GOTPCREL_external(%rip), %rax
+	addq (%rax), %rax
+	movq (%rax), %rax
+	popf
+	vmovq %rax, %xmm0
+	popq %rax
+	leaq 128(%rsp), %rsp
+	vpbroadcastq %xmm0, %xmm0
+# WAS vpbroadcastq foo@GOTPCREL(%rip), %xmm0
+	leaq -128(%rsp), %rsp
+	pushq %rax
+	leaq	.Lfoo_local_target(%rip), %rax
+	vmovq %rax, %xmm0
+	popq %rax
+	leaq 128(%rsp), %rsp
+	vpbroadcastq %xmm0, %xmm0
+
 .comm foobar,64,32
 .text
 BORINGSSL_bcm_text_end: