-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BOLT][AArch64] Exclude JT pattern matching under assert failure #122298
Conversation
@llvm/pr-subscribers-bolt Author: Alexey Moksyakov (yavtuk) ChangesThere are several patterns to implement jump table. Need to exclude the following patterns under assert during disassemble stage:
Full jump table processing for AArch64 is still disabled. Full diff: https://github.com/llvm/llvm-project/pull/122298.diff 3 Files Affected:
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 679c9774c767f7..8b3894ff50b73b 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -213,6 +213,37 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
Inst.getOpcode() == AArch64::ADDXrx64);
}
+ bool isSUB(const MCInst &Inst) const override {
+ const unsigned opcode = Inst.getOpcode();
+ bool isSubInstr = false;
+ switch (opcode) {
+ case AArch64::SUBSWri:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSWrx:
+ case AArch64::SUBSXri:
+ case AArch64::SUBSXrr:
+ case AArch64::SUBSXrs:
+ case AArch64::SUBSXrx:
+ case AArch64::SUBSXrx64:
+ case AArch64::SUBWri:
+ case AArch64::SUBWrr:
+ case AArch64::SUBWrs:
+ case AArch64::SUBWrx:
+ case AArch64::SUBXri:
+ case AArch64::SUBXrr:
+ case AArch64::SUBXrs:
+ case AArch64::SUBXrx:
+ case AArch64::SUBXrx64:
+ isSubInstr = true;
+ break;
+ default:
+ isSubInstr = false;
+ break;
+ }
+ return isSubInstr;
+ }
+
bool isLDRB(const MCInst &Inst) const {
return (Inst.getOpcode() == AArch64::LDRBBpost ||
Inst.getOpcode() == AArch64::LDRBBpre ||
@@ -652,6 +683,34 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
/// # of this BB)
/// br x0 # Indirect jump instruction
///
+ /// adrp + ldr pair instructions of JT
+ /// adrp x3, :got:jump_table
+ /// ldr x1, [x1, #value]
+ /// ldrh w1, [x1, w3, uxtw #1]
+ /// adr x3, 573ae4
+ /// add x1, x3, w1, sxth #2
+ /// br x1
+ ///
+ /// lld/test/ELF/aarch64-adrp-ldr-got.s
+ /// if .rodata and .text are sufficiently (<1M)
+ /// close to each other so that the adrp + ldr pair can be relaxed to
+ /// nop + adr.
+ /// nop #
+ /// adr x0, #6d8f8 # Get JT table address
+ /// ldrh w0, [x0, w4, uxtw #1] # Loads JT entry
+ /// adr x2, 1479b0 # Get PC first instruction for next BB
+ /// add x0, x2, w0, sxth #2 # Finish building branch target
+ /// # (entries in JT are relative to the end
+ /// # of this BB)
+ /// br x0 # Indirect jump instruction
+ ///
+ /// sub + ldr pair instructions of JT, JT address on the stack and other BB
+ /// sub x1, x29, #0x4, lsl #12
+ /// ldr x1, [x1, #14352]
+ /// ldrh w1, [x1, w3, uxtw #1]
+ /// adr x3, 573ae4
+ /// add x1, x3, w1, sxth #2
+ /// br x1
bool analyzeIndirectBranchFragment(
const MCInst &Inst,
DenseMap<const MCInst *, SmallVector<MCInst *, 4>> &UDChain,
@@ -753,45 +812,77 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
// Match ADD that calculates the JumpTable Base Address (not the offset)
SmallVector<MCInst *, 4> &UsesLoad = UDChain[DefLoad];
- const MCInst *DefJTBaseAdd = UsesLoad[1];
+ const MCInst *DefJTPageBias = UsesLoad[1];
MCPhysReg From, To;
- if (DefJTBaseAdd == nullptr || isLoadFromStack(*DefJTBaseAdd) ||
- isRegToRegMove(*DefJTBaseAdd, From, To)) {
+ JumpTable = nullptr;
+ if (DefJTPageBias == nullptr || isLoadFromStack(*DefJTPageBias) ||
+ isRegToRegMove(*DefJTPageBias, From, To)) {
// Sometimes base address may have been defined in another basic block
// (hoisted). Return with no jump table info.
- JumpTable = nullptr;
return true;
}
- if (DefJTBaseAdd->getOpcode() == AArch64::ADR) {
+ if (isAddXri(*DefJTPageBias)) {
+ if (DefJTPageBias->getOperand(2).isImm())
+ Offset = DefJTPageBias->getOperand(2).getImm();
+ SmallVector<MCInst *, 4> &UsesJTBaseAdd = UDChain[DefJTPageBias];
+ const MCInst *DefJTBasePage = UsesJTBaseAdd[1];
+ if (DefJTBasePage == nullptr || isLoadFromStack(*DefJTBasePage)) {
+ return true;
+ }
+ assert(DefJTBasePage->getOpcode() == AArch64::ADRP &&
+ "Failed to match jump table base page pattern! (2)");
+ if (DefJTBasePage->getOperand(1).isExpr())
+ JumpTable = DefJTBasePage->getOperand(1).getExpr();
+ return true;
+ } else if (isADR(*DefJTPageBias)) {
// TODO: Handle the pattern where there is no adrp/add pair.
// It also occurs when the binary is static.
- // adr x13, 0x215a18 <_nl_value_type_LC_COLLATE+0x50>
+ // nop
+ // *adr x13, 0x215a18 <_nl_value_type_LC_COLLATE+0x50>
// ldrh w13, [x13, w12, uxtw #1]
// adr x12, 0x247b30 <__gettextparse+0x5b0>
// add x13, x12, w13, sxth #2
// br x13
- errs() << "BOLT-WARNING: Failed to match indirect branch: "
- "nop/adr instead of adrp/add \n";
- return false;
- }
-
- assert(DefJTBaseAdd->getOpcode() == AArch64::ADDXri &&
- "Failed to match jump table base address pattern! (1)");
+ SmallVector<MCInst *, 4> &UsesJTNop = UDChain[DefJTPageBias];
+ assert((UsesJTNop.size() == 1 && UsesJTNop[0] == nullptr) &&
+ "Failed to match jump table pattern! (2)");
+ if (DefJTPageBias->getOperand(1).isExpr()) {
+ JumpTable = DefJTPageBias->getOperand(1).getExpr();
+ return true;
+ }
+ } else if (mayLoad(*DefJTPageBias)) {
+ if (isLoadFromStack(*DefJTPageBias))
+ return true;
- if (DefJTBaseAdd->getOperand(2).isImm())
- Offset = DefJTBaseAdd->getOperand(2).getImm();
- SmallVector<MCInst *, 4> &UsesJTBaseAdd = UDChain[DefJTBaseAdd];
- const MCInst *DefJTBasePage = UsesJTBaseAdd[1];
- if (DefJTBasePage == nullptr || isLoadFromStack(*DefJTBasePage)) {
- JumpTable = nullptr;
- return true;
+ SmallVector<MCInst *, 4> &UsesJTBase = UDChain[DefJTPageBias];
+ const MCInst *DefJTBasePage = UsesJTBase[1];
+ if (DefJTBasePage == nullptr)
+ return true;
+ if (DefJTBasePage->getOpcode() == AArch64::ADRP) {
+ // test jmp-table-pattern-matching.s (3)
+ if (DefJTBasePage->getOperand(1).isExpr())
+ JumpTable = DefJTBasePage->getOperand(1).getExpr();
+ return true;
+ } else {
+ // Base address may have been defined in another basic block
+ // and sub instruction can be used to get base page address
+ // jmp-table-pattern-matching.s (2)
+ if (isSUB(*DefJTBasePage)) {
+ for (const MCOperand &Operand : useOperands(*DefJTBasePage)) {
+ if (!Operand.isReg())
+ continue;
+ const unsigned Reg = Operand.getReg();
+ if (Reg == AArch64::SP || Reg == AArch64::WSP ||
+ Reg == AArch64::FP || Reg == AArch64::W29)
+ return true;
+ }
+ }
+ }
}
- assert(DefJTBasePage->getOpcode() == AArch64::ADRP &&
- "Failed to match jump table base page pattern! (2)");
- if (DefJTBasePage->getOperand(1).isExpr())
- JumpTable = DefJTBasePage->getOperand(1).getExpr();
- return true;
+
+ assert("Failed to match jump table pattern! (4)");
+ return false;
}
DenseMap<const MCInst *, SmallVector<MCInst *, 4>>
diff --git a/bolt/test/AArch64/jmp-table-pattern-matching.s b/bolt/test/AArch64/jmp-table-pattern-matching.s
new file mode 100644
index 00000000000000..e7cb171bd948c0
--- /dev/null
+++ b/bolt/test/AArch64/jmp-table-pattern-matching.s
@@ -0,0 +1,127 @@
+## This test checks that disassemble stage works properly
+## JT with indirect branch
+## 1) nop + adr pair instructions
+## 2) sub + ldr pair instructions
+## 3) adrp + ldr pair instructions
+
+# REQUIRES: system-linux
+
+# RUN: rm -rf %t && split-file %s %t
+
+## Prepare binary (1)
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %t/jt_nop_adr.s \
+# -o %t/jt_nop_adr.o
+# RUN: %clang %cflags --target=aarch64-unknown-linux %t/jt_nop_adr.o \
+# RUN: -Wl,-q -Wl,-z,now, -Wl,-T,%t/within-adr-range.t -o %t/jt_nop_adr.exe
+# RUN: llvm-objdump --no-show-raw-insn -d %t/jt_nop_adr.exe | FileCheck \
+# --check-prefix=JT-RELAXED %s
+
+# JT-RELAXED: <_start>:
+# JT-RELAXED-NEXT: nop
+# JT-RELAXED-NEXT: adr {{.*}}x3
+
+# RUN: llvm-bolt %t/jt_nop_adr.exe -o %t/jt_nop_adr.bolt 2>&1 | FileCheck %s
+# CHECK-NOT: Failed to match
+
+## This linker script ensures that .rodata and .text are sufficiently (<1M)
+## close to each other so that the adrp + ldr pair can be relaxed to nop + adr.
+#--- within-adr-range.t
+SECTIONS {
+ .rodata 0x1000: { *(.rodata) }
+ .text 0x2000: { *(.text) }
+ .rela.rodata : { *(.rela.rodata) }
+}
+
+## Prepare binary (2)
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %t/jt_sub_ldr.s \
+# -o %t/jt_sub_ldr.o
+# RUN: %clang %cflags --target=aarch64-unknown-linux %t/jt_sub_ldr.o \
+# RUN: -Wl,-q -Wl,-z,now -o %t/jt_sub_ldr.exe
+# RUN: llvm-objdump --no-show-raw-insn -d %t/jt_sub_ldr.exe | FileCheck \
+# --check-prefix=JT-SUB-LDR %s
+
+# JT-SUB-LDR: <_start>:
+# JT-SUB-LDR-NEXT: sub
+# JT-SUB-LDR-NEXT: ldr
+
+# RUN: llvm-bolt %t/jt_sub_ldr.exe -o %t/jt_sub_ldr.bolt 2>&1 | FileCheck \
+# --check-prefix=JT-BOLT-SUBLDR %s
+# JT-BOLT-SUBLDR-NOT: Failed to match
+
+## Prepare binary (3)
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %t/jt_adrp_ldr.s \
+# -o %t/jt_adrp_ldr.o
+# RUN: %clang %cflags --target=aarch64-unknown-linux %t/jt_adrp_ldr.o \
+# RUN: -Wl,-q -Wl,-z,now -Wl,--no-relax -o %t/jt_adrp_ldr.exe
+# RUN: llvm-objdump --no-show-raw-insn -d %t/jt_adrp_ldr.exe | FileCheck \
+# --check-prefix=JT-ADRP-LDR %s
+
+# JT-ADRP-LDR: <_start>:
+# JT-ADRP-LDR-NEXT: adrp
+# JT-ADRP-LDR-NEXT: ldr
+
+# RUN: llvm-bolt %t/jt_adrp_ldr.exe -o %t/jt_adrp_ldr.bolt 2>&1 | FileCheck \
+# --check-prefix=JT-BOLT-ADRP-LDR %s
+# JT-BOLT-ADRP-LDR-NOT: Failed to match
+
+#--- jt_nop_adr.s
+ .globl _start
+ .type _start, %function
+_start:
+ adrp x3, :got:jump_table
+ ldr x3, [x3, #:got_lo12:jump_table]
+ ldrh w3, [x3, x1, lsl #1]
+ adr x1, test2_0
+ add x3, x1, w3, sxth #2
+ br x3
+test2_0:
+ ret
+test2_1:
+ ret
+
+ .section .rodata,"a",@progbits
+jump_table:
+ .hword (test2_0-test2_0)>>2
+ .hword (test2_1-test2_0)>>2
+
+
+#--- jt_sub_ldr.s
+ .globl _start
+ .type _start, %function
+_start:
+ sub x1, x29, #0x4, lsl #12
+ ldr x1, [x1, #14352]
+ ldrh w1, [x1, w3, uxtw #1]
+ adr x3, test2_0
+ add x1, x3, w1, sxth #2
+ br x1
+test2_0:
+ ret
+test2_1:
+ ret
+
+ .section .rodata,"a",@progbits
+jump_table:
+ .hword (test2_0-test2_0)>>2
+ .hword (test2_1-test2_0)>>2
+
+
+#--- jt_adrp_ldr.s
+ .globl _start
+ .type _start, %function
+_start:
+ adrp x3, :got:jump_table
+ ldr x3, [x3, #:got_lo12:jump_table]
+ ldrh w3, [x3, x1, lsl #1]
+ adr x1, test2_0
+ add x3, x1, w3, sxth #2
+ br x3
+test2_0:
+ ret
+test2_1:
+ ret
+
+ .section .rodata,"a",@progbits
+jump_table:
+ .hword (test2_0-test2_0)>>2
+ .hword (test2_1-test2_0)>>2
\ No newline at end of file
diff --git a/bolt/test/AArch64/test-indirect-branch.s b/bolt/test/AArch64/test-indirect-branch.s
index 168e50c8f47f52..1e6044d801f701 100644
--- a/bolt/test/AArch64/test-indirect-branch.s
+++ b/bolt/test/AArch64/test-indirect-branch.s
@@ -56,28 +56,9 @@ test1_1:
test1_2:
ret
-// Pattern 2
-// CHECK: BOLT-WARNING: Failed to match indirect branch: nop/adr instead of adrp/add
- .globl test2
- .type test2, %function
-test2:
- nop
- adr x3, jump_table
- ldrh w3, [x3, x1, lsl #1]
- adr x1, test2_0
- add x3, x1, w3, sxth #2
- br x3
-test2_0:
- ret
-test2_1:
- ret
-
.section .rodata,"a",@progbits
datatable:
.word test1_0-datatable
.word test1_1-datatable
.word test1_2-datatable
-jump_table:
- .hword (test2_0-test2_0)>>2
- .hword (test2_1-test2_0)>>2
|
542a79b
to
038b5f7
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for adding more jump tables patterns! It looks good, just clarifying comment below. I am not confident I can approve it on my own, we might need more eyes on it.
if (DefJTBasePage == nullptr || isLoadFromStack(*DefJTBasePage)) { | ||
return true; | ||
} | ||
assert(DefJTBasePage->getOpcode() == AArch64::ADRP && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are we using assert instead of "BOLT-WARNING+return false" to catch more patterns in the wild?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning message is preferable here, you're right, thanks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ilinpv updated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Assertions was removed by #124372
I collect a set of common jt patterns for different compilers (gcc, clang, golang). I will send new PR when I finish local testing. |
There are several patterns to implement jump table. Need to exclude the following patterns under assert during disassemble stage: (1) nop (2) sub (3) adrp adr ldr ldr ldrh ldrh ldrh adr adr adr add add add br br br Full jump table processing for AArch64 is still disabled.
038b5f7
to
8d06c89
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would be happy to proceed with the patch if there are no objections from code owners.
cool, thanks a lot |
I believe it would be beneficial to enhance the AArch64 jump table pattern matching in Bolt. While we are currently working on ABI (ARM-software/abi-aa#297) and would utilize additional metadata to help Bolt to reconstruct control flow for indirect branches from jump tables in the future, having pattern matching as an option for AArch64 perf2bolt/llvm-bolt in the meantime could be advantageous. Additionally, we can verify these patterns in the future by comparing the results from metadata and pattern matching. Please correct me if I have misunderstood anything. |
@ilinpv definitely we should extend the number of supported patterns for jump table, but above PR #124372 is unified and exclude assert for unknown table right now. |
Right, no question about PR #124372 necessity, and no question about generating in log fails of pattern matching. If I read the code correctly both of these things were already incorporated in your patch. My point is about benefit of extending number of supported patterns by (1) "few extra jump table patterns provided" in this PR and (2) "set of common jt patterns for different compilers (gcc, clang, golang)" you collected. Of course, it can certainly be done in a separate PRs, if that works better for you. |
There are several patterns to implement jump table. Need to exclude the following patterns under assert during disassemble stage:
Full jump table processing for AArch64 is still disabled.
Issue #120782