delocate vmovq correctly.

vmovq clears the upper 128 bits of a YMM register, while movq does not.
When translating vmovq to an XMM register, we need to use vmovq in the
final move in order to keep this behaviour.

Change-Id: I81b6eee3ee6db0ea90d7c5098fc7c4ccefaf3b12
Reviewed-on: https://boringssl-review.googlesource.com/20424
Reviewed-by: David Benjamin <davidben@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
diff --git a/util/fipstools/delocate.go b/util/fipstools/delocate.go
index 07a260b..b06e354 100644
--- a/util/fipstools/delocate.go
+++ b/util/fipstools/delocate.go
@@ -859,10 +859,14 @@
 	}
 }
 
-func moveTo(w stringWriter, target string) wrapperFunc {
+func moveTo(w stringWriter, target string, isAVX bool) wrapperFunc {
 	return func(k func()) {
 		k()
-		w.WriteString("\tmovq %rax, " + target + "\n")
+		prefix := ""
+		if isAVX {
+			prefix = "v"
+		}
+		w.WriteString("\t" + prefix + "movq %rax, " + target + "\n")
 	}
 }
 
@@ -1024,7 +1028,8 @@
 					// XMM register, which is not a valid target of an LEA
 					// instruction.
 					wrappers = append(wrappers, saveRegister(d.output))
-					wrappers = append(wrappers, moveTo(d.output, targetReg))
+					isAVX := strings.HasPrefix(instructionName, "v")
+					wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX))
 					targetReg = "%rax"
 					redzoneCleared = true
 				}
diff --git a/util/fipstools/testdata/x86_64-GOTRewrite/out.s b/util/fipstools/testdata/x86_64-GOTRewrite/out.s
index 35fe6c5..d38c5ff 100644
--- a/util/fipstools/testdata/x86_64-GOTRewrite/out.s
+++ b/util/fipstools/testdata/x86_64-GOTRewrite/out.s
@@ -58,14 +58,14 @@
 	addq (%rax), %rax
 	movq (%rax), %rax
 	popf
-	movq %rax, %xmm0
+	vmovq %rax, %xmm0
 	popq %rax
 	leaq 128(%rsp), %rsp
 # WAS vmovq foo@GOTPCREL(%rip), %xmm0
 	leaq -128(%rsp), %rsp
 	pushq %rax
 	leaq	.Lfoo_local_target(%rip), %rax
-	movq %rax, %xmm0
+	vmovq %rax, %xmm0
 	popq %rax
 	leaq 128(%rsp), %rsp