Fix delocate with the aarch64 NO_ASM build
This compiler sometimes emits code like this:
adrp x10, .Llocal_data2+16
ldr q0, [x10, :lo12:.Llocal_data2+16]
We transform it into:
adr x10, .Llocal_data2+16
ldr q0, [x10]
Note this makes some assumptions on the compiler, which I've documented
in comments. We already have a similar assumption on ADRP + ADD pairs,
but it is a little more likely for the compiler to do this in the ADRP +
LDR case.
Hopefully we can get the delocate replacement working soon and make all
this moot.
Change-Id: Icf4ed701142a52edf38d285c0bc5d52c17032d4f
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/71267
Auto-Submit: David Benjamin <davidben@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: Adam Langley <agl@google.com>
diff --git a/util/fipstools/delocate/delocate.go b/util/fipstools/delocate/delocate.go
index 884344d..64eb0cf 100644
--- a/util/fipstools/delocate/delocate.go
+++ b/util/fipstools/delocate/delocate.go
@@ -589,11 +589,15 @@
panic("Symbol reference outside of ldr instruction")
}
- if skipWS(parts.next) != nil || parts.up.next != nil {
- panic("can't handle tweak or post-increment with symbol references")
- }
-
- // Suppress the offset; adrp loaded the full address.
+ // Suppress the offset; adrp loaded the full address. This assumes the
+ // the compiler does not emit code like the following:
+ //
+ // adrp x0, symbol
+ // ldr x1, [x0, :lo12:symbol]
+ // ldr x2, [x0, :lo12:symbol+4]
+ //
+ // Such code would only work if lo12(symbol+4) = lo12(symbol) + 4, but
+ // this is true when symbol is sufficiently aligned.
args = append(args, "["+baseAddrReg+"]")
changed = true
continue
@@ -610,6 +614,15 @@
// The adrp instruction will have been turned into a sequence that loads
// the full address, above, thus the offset is turned into zero. If that
// results in the instruction being a nop, then it is deleted.
+ //
+ // This assumes the compiler does not emit code like the following:
+ //
+ // adrp x0, symbol
+ // add x1, x0, :lo12:symbol
+ // add x2, x0, :lo12:symbol+4
+ //
+ // Such code would only work if lo12(symbol+4) = lo12(symbol) + 4, but
+ // this is true when symbol is sufficiently aligned.
if instructionName != "add" {
panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName))
}
diff --git a/util/fipstools/delocate/testdata/aarch64-Basic/in.s b/util/fipstools/delocate/testdata/aarch64-Basic/in.s
index f151c23..f93a83f 100644
--- a/util/fipstools/delocate/testdata/aarch64-Basic/in.s
+++ b/util/fipstools/delocate/testdata/aarch64-Basic/in.s
@@ -37,6 +37,10 @@
adrp x10, .Llocal_data2
ldr q0, [x10, :lo12:.Llocal_data2]
+ // Load from local symbol with offset
+ adrp x10, .Llocal_data2+16
+ ldr q0, [x10, :lo12:.Llocal_data2+16]
+
bl local_function
bl remote_function
diff --git a/util/fipstools/delocate/testdata/aarch64-Basic/out.s b/util/fipstools/delocate/testdata/aarch64-Basic/out.s
index c024610..4c3ec6d 100644
--- a/util/fipstools/delocate/testdata/aarch64-Basic/out.s
+++ b/util/fipstools/delocate/testdata/aarch64-Basic/out.s
@@ -80,6 +80,12 @@
// WAS ldr q0, [x10, :lo12:.Llocal_data2]
ldr q0, [x10]
+ // Load from local symbol with offset
+// WAS adrp x10, .Llocal_data2+16
+ adr x10, .Llocal_data2+16
+// WAS ldr q0, [x10, :lo12:.Llocal_data2+16]
+ ldr q0, [x10]
+
// WAS bl local_function
bl .Llocal_function_local_target