Skip to content

Commit a339563

Browse files
Mike PallBuristan
authored andcommitted
ARM64: Prevent STP fusion for conditional code emitted by TBAR.
Thanks to Peter Cawley. (cherry picked from commit 7cc53f0) Assume we have a trace for the several `setmetatable()` calls to the same table. This trace contains the following IR: | 0011 p64 FREF 0003 tab.meta | ... | 0018 x0 > tab TNEW 0 0 | 0019 tab TBAR 0003 | 0020 tab FSTORE 0011 0018 The expected mcode to be emitted for the last two IRs is the following: | 55626cffb0 ldrb w30, [x19, 8] ; tab->marked | 55626cffb4 tst w30, 0x4 ; Is black? | 55626cffb8 beq 0x626cffd0 ; Skip marking. | 55626cffbc ldr x27, [x20, 128] | 55626cffc0 and w30, w30, 0xfffffffb | 55626cffc4 str x19, [x20, 128] | 55626cffcc strb w30, [x19, 8] ; tab->marked | 55626cffc8 str x27, [x19, 24] ; tab->gclist | 55626cffd0 str x0, [x19, 32] ; tab->metatable But the last 2 instructions are fused into the following `stp`: | 55581dffd0 stp x27, x0, [x19, 48] Hence, the GC propagation frontier back is done partially, since `str x27, [x19, 24]` is not skipped despite TBAR semantics. This leads to the incorrect value in the `gclist` and the segmentation fault during its traversal on GC step. This patch prevents this fusion via switching instruction for `tab->gclist` and `tab->marked` storing. Sergey Kaplun: * added the description and the test for the problem Part of tarantool/tarantool#11278
1 parent 2b0e188 commit a339563

File tree

2 files changed

+81
-1
lines changed

2 files changed

+81
-1
lines changed

src/lj_asm_arm64.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1271,8 +1271,9 @@ static void asm_tbar(ASMState *as, IRIns *ir)
12711271
Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
12721272
Reg mark = RID_TMP;
12731273
MCLabel l_end = emit_label(as);
1274-
emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
12751274
emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
1275+
/* Keep STRx in the middle to avoid LDP/STP fusion with surrounding code. */
1276+
emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
12761277
emit_setgl(as, tab, gc.grayagain);
12771278
emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
12781279
emit_getgl(as, link, gc.grayagain);
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
local tap = require('tap')
2+
3+
-- This test demonstrates LuaJIT's incorrect fusing of store
4+
-- instructions separated by the conditional branch on arm64.
5+
-- See also https://github.com/LuaJIT/LuaJIT/issues/1057.
6+
local test = tap.test('lj-1057-arm64-stp-fusing-across-tbar'):skipcond({
7+
['Test requires JIT enabled'] = not jit.status(),
8+
})
9+
10+
test:plan(2)
11+
12+
-- XXX: Simplify the `jit.dump()` output.
13+
local setmetatable = setmetatable
14+
15+
-- The function below generates the following IR:
16+
-- | 0011 p64 FREF 0003 tab.meta
17+
-- | ...
18+
-- | 0018 x0 > tab TNEW #0 #0
19+
-- | 0019 tab TBAR 0003
20+
-- | 0020 tab FSTORE 0011 0018
21+
-- The expected mcode to be emitted for the last two IRs is the
22+
-- following:
23+
-- | 55626cffb0 ldrb w30, [x19, #8] ; tab->marked
24+
-- | 55626cffb4 tst w30, #0x4 ; Is black?
25+
-- | 55626cffb8 beq 0x626cffd0 ; Skip marking.
26+
-- | 55626cffbc ldr x27, [x20, #128]
27+
-- | 55626cffc0 and w30, w30, #0xfffffffb
28+
-- | 55626cffc4 str x19, [x20, #128]
29+
-- | 55626cffcc strb w30, [x19, #8] ; tab->marked
30+
-- | 55626cffc8 str x27, [x19, #24] ; tab->gclist
31+
-- | 55626cffd0 str x0, [x19, #32] ; tab->metatable
32+
--
33+
-- But the last 2 instructions are fused into the following `stp`:
34+
-- | 55581dffd0 stp x27, x0, [x19, #48]
35+
-- Hence, the GC propagation frontier back is done partially,
36+
-- since `str x27, [x19, #24]` is not skipped despite TBAR
37+
-- semantics. This leads to the incorrect value in the `gclist`
38+
-- and the segmentation fault during its traversal on GC step.
39+
local function trace(target_t)
40+
-- Precreate a table for the FLOAD to avoid TNEW in between.
41+
local stack_t = {}
42+
-- Generate FSTORE TBAR pair. The FSTORE will be dropped due to
43+
-- the FSTORE below by DSE.
44+
setmetatable(target_t, {})
45+
-- Generate FSTORE. TBAR will be dropped by CSE.
46+
setmetatable(target_t, stack_t)
47+
end
48+
49+
jit.opt.start('hotloop=1')
50+
51+
-- XXX: Need to trigger the GC on trace to introspect that the
52+
-- GC chain is broken. Use empirical 10000 iterations.
53+
local tab = {}
54+
for _ = 1, 1e4 do
55+
trace(tab)
56+
end
57+
58+
test:ok(true, 'no assertion failure in the simple loop')
59+
60+
-- The similar test, but be sure that we finish the whole GC
61+
-- cycle, plus using upvalue instead of stack slot for the target
62+
-- table.
63+
64+
local target_t = {}
65+
local function trace2()
66+
local stack_t = {}
67+
setmetatable(target_t, {})
68+
setmetatable(target_t, stack_t)
69+
end
70+
71+
collectgarbage('collect')
72+
collectgarbage('setstepmul', 1)
73+
while not collectgarbage('step') do
74+
trace2()
75+
end
76+
77+
test:ok(true, 'no assertion failure in the whole GC cycle')
78+
79+
test:done(true)

0 commit comments

Comments
 (0)