Update delocate to handle new compiler output.

Delocate failed with some versions of Clang that reference
OPENSSL_ia32cap_P with an orq instruction.

Change-Id: I448d291594f5f147424e6f7014a681c4201b0aee
Reviewed-on: https://boringssl-review.googlesource.com/29764
Reviewed-by: Adam Langley <alangley@gmail.com>
diff --git a/util/fipstools/delocate.go b/util/fipstools/delocate.go
index 827e446..d58e5be 100644
--- a/util/fipstools/delocate.go
+++ b/util/fipstools/delocate.go
@@ -795,6 +795,9 @@
 	instrTransformingMove
 	instrJump
 	instrConditionalMove
+	// instrCombine merges the source and destination in some fashion, for example
+	// a 2-operand bitwise operation.
+	instrCombine
 	instrOther
 )
 
@@ -820,6 +823,11 @@
 			return instrJump
 		}
 
+	case "orq", "andq", "xorq":
+		if len(args) == 2 {
+			return instrCombine
+		}
+
 	case "vpbroadcastq":
 		if len(args) == 2 {
 			return instrTransformingMove
@@ -855,24 +863,41 @@
 	}
 }
 
-func saveRegister(w stringWriter) wrapperFunc {
+func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc {
 	return func(k func()) {
-		w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
-		w.WriteString("\tpushq %rax\n")
+		if !redzoneCleared {
+			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
+			defer w.WriteString("\tleaq 128(%rsp), %rsp\n")
+		}
+		w.WriteString("\tpushfq\n")
 		k()
-		w.WriteString("\tpopq %rax\n")
-		w.WriteString("\tleaq 128(%rsp), %rsp\n")
+		w.WriteString("\tpopfq\n")
 	}
 }
 
-func moveTo(w stringWriter, target string, isAVX bool) wrapperFunc {
+func saveRegister(w stringWriter, avoidReg string) (wrapperFunc, string) {
+	reg := "%rax"
+	if reg == avoidReg {
+		reg = "%rbx"
+	}
+
+	return func(k func()) {
+		w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
+		w.WriteString("\tpushq " + reg + "\n")
+		k()
+		w.WriteString("\tpopq " + reg + "\n")
+		w.WriteString("\tleaq 128(%rsp), %rsp\n")
+	}, reg
+}
+
+func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc {
 	return func(k func()) {
 		k()
 		prefix := ""
 		if isAVX {
 			prefix = "v"
 		}
-		w.WriteString("\t" + prefix + "movq %rax, " + target + "\n")
+		w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n")
 	}
 }
 
@@ -883,6 +908,13 @@
 	}
 }
 
+func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
+	return func(k func()) {
+		k()
+		w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n")
+	}
+}
+
 func isValidLEATarget(reg string) bool {
 	return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm")
 }
@@ -938,16 +970,9 @@
 			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
 			changed = didChange
 
-			if symbol == "OPENSSL_ia32cap_P" {
-				var ok bool
-				if section == "GOTPCREL" {
-					ok = instructionName == "movq"
-				} else if section == "" {
-					ok = instructionName == "leaq"
-				}
-
-				if !ok {
-					return nil, fmt.Errorf("instruction %q referenced OPENSSL_ia32cap_P in section %q, should be a movq from GOTPCREL or a direct leaq", instructionName, section)
+			if symbol == "OPENSSL_ia32cap_P" && section == "" {
+				if instructionName != "leaq" {
+					return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName)
 				}
 
 				if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 {
@@ -963,13 +988,14 @@
 				}
 
 				changed = true
+
+				// Flag-altering instructions (i.e. addq) are going to be used so the
+				// flags need to be preserved.
+				wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */))
+
 				wrappers = append(wrappers, func(k func()) {
-					d.output.WriteString("\tleaq\t-128(%rsp), %rsp\n") // Clear the red zone.
-					d.output.WriteString("\tpushfq\n")
 					d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n")
 					d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n")
-					d.output.WriteString("\tpopfq\n")
-					d.output.WriteString("\tleaq\t128(%rsp), %rsp\n")
 				})
 
 				break Args
@@ -1021,6 +1047,7 @@
 
 				// Reduce the instruction to movq symbol@GOTPCREL, targetReg.
 				var targetReg string
+				var redzoneCleared bool
 				switch classifyInstruction(instructionName, argNodes) {
 				case instrPush:
 					wrappers = append(wrappers, push(d.output))
@@ -1038,23 +1065,45 @@
 					if isValidLEATarget(targetReg) {
 						return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.")
 					}
+				case instrCombine:
+					targetReg = d.contents(argNodes[1])
+					if !isValidLEATarget(targetReg) {
+						return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers")
+					}
+					saveRegWrapper, tempReg := saveRegister(d.output, targetReg)
+					redzoneCleared = true
+					wrappers = append(wrappers, saveRegWrapper)
+
+					wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg))
+					targetReg = tempReg
 				default:
 					return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName)
 				}
 
-				var redzoneCleared bool
 				if !isValidLEATarget(targetReg) {
 					// Sometimes the compiler will load from the GOT to an
 					// XMM register, which is not a valid target of an LEA
 					// instruction.
-					wrappers = append(wrappers, saveRegister(d.output))
+					saveRegWrapper, tempReg := saveRegister(d.output, "")
+					wrappers = append(wrappers, saveRegWrapper)
 					isAVX := strings.HasPrefix(instructionName, "v")
-					wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX))
-					targetReg = "%rax"
+					wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg))
+					targetReg = tempReg
+					if redzoneCleared {
+						return nil, fmt.Errorf("internal error: Red Zone was already cleared")
+					}
 					redzoneCleared = true
 				}
 
-				if useGOT {
+				if symbol == "OPENSSL_ia32cap_P" {
+					// Flag-altering instructions (i.e. addq) are going to be used so the
+					// flags need to be preserved.
+					wrappers = append(wrappers, saveFlags(d.output, redzoneCleared))
+					wrappers = append(wrappers, func(k func()) {
+						d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n")
+						d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n")
+					})
+				} else if useGOT {
 					wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared))
 				} else {
 					wrappers = append(wrappers, func(k func()) {
@@ -1239,8 +1288,8 @@
 	}
 
 	w.WriteString(".text\n")
-	w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"\n", maxObservedFileNumber + 1))
-	w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber + 1))
+	w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"\n", maxObservedFileNumber+1))
+	w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1))
 	w.WriteString("BORINGSSL_bcm_text_start:\n")
 
 	for _, input := range inputs {
@@ -1250,7 +1299,7 @@
 	}
 
 	w.WriteString(".text\n")
-	w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber + 1))
+	w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1))
 	w.WriteString("BORINGSSL_bcm_text_end:\n")
 
 	// Emit redirector functions. Each is a single jump instruction.
diff --git a/util/fipstools/testdata/x86_64-GOTRewrite/in.s b/util/fipstools/testdata/x86_64-GOTRewrite/in.s
index 0f9c70e..ccbc0bf 100644
--- a/util/fipstools/testdata/x86_64-GOTRewrite/in.s
+++ b/util/fipstools/testdata/x86_64-GOTRewrite/in.s
@@ -6,6 +6,12 @@
 	# As is the equivalent GOTPCREL movq.
 	movq OPENSSL_ia32cap_P@GOTPCREL(%rip), %r12
 
+	# And a non-movq instruction via the GOT.
+	orq OPENSSL_ia32cap_P@GOTPCREL(%rip), %r12
+
+	# ... which targets the default temp register
+	orq OPENSSL_ia32cap_P@GOTPCREL(%rip), %rax
+
 	# Test that GOTPCREL accesses get translated. They are handled
 	# differently for local and external symbols.
 
diff --git a/util/fipstools/testdata/x86_64-GOTRewrite/out.s b/util/fipstools/testdata/x86_64-GOTRewrite/out.s
index 0485c87..3d421e5 100644
--- a/util/fipstools/testdata/x86_64-GOTRewrite/out.s
+++ b/util/fipstools/testdata/x86_64-GOTRewrite/out.s
@@ -7,21 +7,45 @@
 foo:
 	# leaq of OPENSSL_ia32cap_P is supported.
 # WAS leaq OPENSSL_ia32cap_P(%rip), %r11
-	leaq	-128(%rsp), %rsp
+	leaq -128(%rsp), %rsp
 	pushfq
 	leaq	OPENSSL_ia32cap_addr_delta(%rip), %r11
 	addq	(%r11), %r11
 	popfq
-	leaq	128(%rsp), %rsp
+	leaq 128(%rsp), %rsp
 
 	# As is the equivalent GOTPCREL movq.
 # WAS movq OPENSSL_ia32cap_P@GOTPCREL(%rip), %r12
-	leaq	-128(%rsp), %rsp
+	leaq -128(%rsp), %rsp
 	pushfq
 	leaq	OPENSSL_ia32cap_addr_delta(%rip), %r12
 	addq	(%r12), %r12
 	popfq
-	leaq	128(%rsp), %rsp
+	leaq 128(%rsp), %rsp
+
+	# And a non-movq instruction via the GOT.
+# WAS orq OPENSSL_ia32cap_P@GOTPCREL(%rip), %r12
+	leaq -128(%rsp), %rsp
+	pushq %rax
+	pushfq
+	leaq	OPENSSL_ia32cap_addr_delta(%rip), %rax
+	addq	(%rax), %rax
+	popfq
+	orq %rax, %r12
+	popq %rax
+	leaq 128(%rsp), %rsp
+
+	# ... which targets the default temp register
+# WAS orq OPENSSL_ia32cap_P@GOTPCREL(%rip), %rax
+	leaq -128(%rsp), %rsp
+	pushq %rbx
+	pushfq
+	leaq	OPENSSL_ia32cap_addr_delta(%rip), %rbx
+	addq	(%rbx), %rbx
+	popfq
+	orq %rbx, %rax
+	popq %rbx
+	leaq 128(%rsp), %rsp
 
 	# Test that GOTPCREL accesses get translated. They are handled
 	# differently for local and external symbols.