Fix delocate with the aarch64 NO_ASM build This compiler sometimes emits code like this: adrp x10, .Llocal_data2+16 ldr q0, [x10, :lo12:.Llocal_data2+16] We transform it into: adr x10, .Llocal_data2+16 ldr q0, [x10] Note this makes some assumptions on the compiler, which I've documented in comments. We already have a similar assumption on ADRP + ADD pairs, but it is a little more likely for the compiler to do this in the ADRP + LDR case. Hopefully we can get the delocate replacement working soon and make all this moot. Change-Id: Icf4ed701142a52edf38d285c0bc5d52c17032d4f Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/71267 Auto-Submit: David Benjamin <davidben@google.com> Commit-Queue: David Benjamin <davidben@google.com> Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com>
diff --git a/util/fipstools/delocate/delocate.go b/util/fipstools/delocate/delocate.go index 884344d..64eb0cf 100644 --- a/util/fipstools/delocate/delocate.go +++ b/util/fipstools/delocate/delocate.go
@@ -589,11 +589,15 @@ panic("Symbol reference outside of ldr instruction") } - if skipWS(parts.next) != nil || parts.up.next != nil { - panic("can't handle tweak or post-increment with symbol references") - } - - // Suppress the offset; adrp loaded the full address. + // Suppress the offset; adrp loaded the full address. This assumes the + // the compiler does not emit code like the following: + // + // adrp x0, symbol + // ldr x1, [x0, :lo12:symbol] + // ldr x2, [x0, :lo12:symbol+4] + // + // Such code would only work if lo12(symbol+4) = lo12(symbol) + 4, but + // this is true when symbol is sufficiently aligned. args = append(args, "["+baseAddrReg+"]") changed = true continue @@ -610,6 +614,15 @@ // The adrp instruction will have been turned into a sequence that loads // the full address, above, thus the offset is turned into zero. If that // results in the instruction being a nop, then it is deleted. + // + // This assumes the compiler does not emit code like the following: + // + // adrp x0, symbol + // add x1, x0, :lo12:symbol + // add x2, x0, :lo12:symbol+4 + // + // Such code would only work if lo12(symbol+4) = lo12(symbol) + 4, but + // this is true when symbol is sufficiently aligned. if instructionName != "add" { panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName)) }
diff --git a/util/fipstools/delocate/testdata/aarch64-Basic/in.s b/util/fipstools/delocate/testdata/aarch64-Basic/in.s index f151c23..f93a83f 100644 --- a/util/fipstools/delocate/testdata/aarch64-Basic/in.s +++ b/util/fipstools/delocate/testdata/aarch64-Basic/in.s
@@ -37,6 +37,10 @@ adrp x10, .Llocal_data2 ldr q0, [x10, :lo12:.Llocal_data2] + // Load from local symbol with offset + adrp x10, .Llocal_data2+16 + ldr q0, [x10, :lo12:.Llocal_data2+16] + bl local_function bl remote_function
diff --git a/util/fipstools/delocate/testdata/aarch64-Basic/out.s b/util/fipstools/delocate/testdata/aarch64-Basic/out.s index c024610..4c3ec6d 100644 --- a/util/fipstools/delocate/testdata/aarch64-Basic/out.s +++ b/util/fipstools/delocate/testdata/aarch64-Basic/out.s
@@ -80,6 +80,12 @@ // WAS ldr q0, [x10, :lo12:.Llocal_data2] ldr q0, [x10] + // Load from local symbol with offset +// WAS adrp x10, .Llocal_data2+16 + adr x10, .Llocal_data2+16 +// WAS ldr q0, [x10, :lo12:.Llocal_data2+16] + ldr q0, [x10] + // WAS bl local_function bl .Llocal_function_local_target