Skip to content

Commit 577aa32

Browse files
committed
ffi/gc: restore back rehashing of finalizers table
This is a follow-up to the commits 2115828 ("FFI: Drop finalizer table rehash after GC cycle.") and bfcbaa7 ("Drop unused function wrapper."). After them, the rehashing of the cdata finalizer table at the end of the GC cycle is dropped. Without reshashing of this table, the table increases the estimated amount of memory for the GC. Hence, with the bigger `estimate`, the threshold before starting the GC cycle is increased too. This allows allocating more cdata objects and increasing the size of the finalizer table again. This increases the memory estimate again and so on. As a result, we have unlimited memory growth without rehashing of the table for the cdata-intensive workloads. This patch reverts back the code changes (but not the test) of the aforementioned commits. Also, it fixes the possible crash after rehashing of the cdata finalizers table by adding the protected call to the GC steps on the trace itself and on the trace exit. Reviewed-by: Sergey Bronnikov <[email protected]> Signed-off-by: Sergey Kaplun <[email protected]>
1 parent cc96994 commit 577aa32

File tree

6 files changed

+120
-5
lines changed

6 files changed

+120
-5
lines changed

src/lj_gc.c

+29-2
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,7 @@ static void gc_finalize(lua_State *L)
548548
setcdataV(L, &tmp, gco2cd(o));
549549
tv = lj_tab_set(L, tabref(g->gcroot[GCROOT_FFI_FIN]), &tmp);
550550
if (!tvisnil(tv)) {
551+
g->gc.nocdatafin = 0;
551552
copyTV(L, &tmp, tv);
552553
setnilV(tv); /* Clear entry in finalizer table. */
553554
gc_call_finalizer(g, L, &tmp, o);
@@ -693,6 +694,9 @@ static size_t gc_onestep(lua_State *L)
693694
lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
694695
if (gcref(g->gc.mmudata)) { /* Need any finalizations? */
695696
g->gc.state = GCSfinalize;
697+
#if LJ_HASFFI
698+
g->gc.nocdatafin = 1;
699+
#endif
696700
} else { /* Otherwise skip this phase to help the JIT. */
697701
g->gc.state = GCSpause; /* End of GC cycle. */
698702
g->gc.debt = 0;
@@ -709,6 +713,9 @@ static size_t gc_onestep(lua_State *L)
709713
g->gc.estimate -= GCFINALIZECOST;
710714
return GCFINALIZECOST;
711715
}
716+
#if LJ_HASFFI
717+
if (!g->gc.nocdatafin) lj_tab_rehash(L, tabref(g->gcroot[GCROOT_FFI_FIN]));
718+
#endif
712719
g->gc.state = GCSpause; /* End of GC cycle. */
713720
g->gc.debt = 0;
714721
return 0;
@@ -757,15 +764,35 @@ void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L)
757764
lj_gc_step(L);
758765
}
759766

767+
/* Need to protect lj_gc_step because it may throw. */
768+
static TValue *gc_step_jit_cp(lua_State *L, lua_CFunction dummy, void *ud)
769+
{
770+
MSize steps = (MSize)(uintptr_t)ud;
771+
UNUSED(dummy);
772+
773+
/* Always catch error here and don't call error function. */
774+
cframe_errfunc(L->cframe) = 0;
775+
cframe_nres(L->cframe) = -2*LUAI_MAXSTACK*(int)sizeof(TValue);
776+
777+
while (steps-- > 0 && lj_gc_step(L) == 0)
778+
;
779+
780+
return NULL;
781+
}
782+
760783
#if LJ_HASJIT
761784
/* Perform multiple GC steps. Called from JIT-compiled code. */
762785
int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps)
763786
{
764787
lua_State *L = gco2th(gcref(g->cur_L));
788+
int32_t ostate = g->vmstate;
789+
int errcode;
765790
L->base = tvref(G(L)->jit_base);
766791
L->top = curr_topL(L);
767-
while (steps-- > 0 && lj_gc_step(L) == 0)
768-
;
792+
errcode = lj_vm_cpcall(L, NULL, (void *)(uintptr_t)steps, gc_step_jit_cp);
793+
g->vmstate = ostate;
794+
if (errcode)
795+
lj_err_throw(L, errcode); /* Propagate errors. */
769796
/* Return 1 to force a trace exit. */
770797
return (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize);
771798
}

src/lj_obj.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -611,7 +611,7 @@ typedef struct GCState {
611611
GCSize threshold; /* Memory threshold. */
612612
uint8_t currentwhite; /* Current white color. */
613613
uint8_t state; /* GC state. */
614-
uint8_t unused0;
614+
uint8_t nocdatafin; /* No cdata finalizer called. */
615615
#if LJ_64
616616
uint8_t lightudnum; /* Number of lightuserdata segments - 1. */
617617
#else

src/lj_tab.c

+7
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,13 @@ static void rehashtab(lua_State *L, GCtab *t, cTValue *ek)
388388
lj_tab_resize(L, t, asize, hsize2hbits(total));
389389
}
390390

391+
#if LJ_HASFFI
392+
void lj_tab_rehash(lua_State *L, GCtab *t)
393+
{
394+
rehashtab(L, t, niltv(L));
395+
}
396+
#endif
397+
391398
void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize)
392399
{
393400
lj_tab_resize(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0);

src/lj_tab.h

+3
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize);
4141
LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt);
4242
LJ_FUNC void LJ_FASTCALL lj_tab_clear(GCtab *t);
4343
LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t);
44+
#if LJ_HASFFI
45+
LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t);
46+
#endif
4447
LJ_FUNC void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits);
4548
LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize);
4649

src/lj_trace.c

+18-2
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,18 @@ static TValue *trace_exit_cp(lua_State *L, lua_CFunction dummy, void *ud)
823823
return NULL;
824824
}
825825

826+
/* Need to protect lj_gc_step because it may throw. */
827+
static TValue *trace_exit_gc_cp(lua_State *L, lua_CFunction dummy, void *unused)
828+
{
829+
/* Always catch error here and don't call error function. */
830+
cframe_errfunc(L->cframe) = 0;
831+
cframe_nres(L->cframe) = -2*LUAI_MAXSTACK*(int)sizeof(TValue);
832+
lj_gc_step(L);
833+
UNUSED(dummy);
834+
UNUSED(unused);
835+
return NULL;
836+
}
837+
826838
#ifndef LUAJIT_DISABLE_VMEVENT
827839
/* Push all registers from exit state. */
828840
static void trace_exit_regs(lua_State *L, ExitState *ex)
@@ -918,8 +930,12 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
918930
} else if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) {
919931
/* Just exit to interpreter. */
920932
} else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) {
921-
if (!(G(L)->hookmask & HOOK_GC))
922-
lj_gc_step(L); /* Exited because of GC: drive GC forward. */
933+
if (!(G(L)->hookmask & HOOK_GC)) {
934+
/* Exited because of GC: drive GC forward. */
935+
errcode = lj_vm_cpcall(L, NULL, NULL, trace_exit_gc_cp);
936+
if (errcode)
937+
return -errcode; /* Return negated error code. */
938+
}
923939
} else if ((J->flags & JIT_F_ON)) {
924940
trace_hotside(J, pc);
925941
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
local tap = require('tap')
2+
local ffi = require('ffi')
3+
4+
-- Test file to demonstrate too fast memory growth without
5+
-- shrinking the finalizer table for cdata at the end of the GC
6+
-- cycle.
7+
-- See also: https://github.com/LuaJIT/LuaJIT/issues/1350.
8+
9+
local test = tap.test('lj-1350-fix-gc-finalizer-pressure'):skipcond({
10+
-- Tarantool has its own cdata objects. This makes the test
11+
-- unpredictable. Disable it.
12+
['Disable test for Tarantool'] = _TARANTOOL,
13+
['Test requires GC64 mode enabled'] = not ffi.abi('gc64'),
14+
['Disabled with Valgrind (Timeout)'] = os.getenv('LUAJIT_TEST_USE_VALGRIND'),
15+
})
16+
17+
test:plan(1)
18+
19+
-- This stress test shows the memory overconsumption if LuaJIT
20+
-- does not rehash the table with cdata finalizers at the end of
21+
-- the GC cycle. Without reshashing of this table, it increases
22+
-- the estimated amount of memory for the GC. Hence, with the
23+
-- bigger `estimate`, the threshold before starting the GC cycle
24+
-- is increased too. This allows allocating more cdata objects and
25+
-- increasing the size of the finalizer table again. This
26+
-- increases the memory estimate again and so on. As a result, we
27+
-- have unlimited memory growth without rehashing of the table for
28+
-- the cdata-intensive workloads.
29+
30+
local cnt = 0
31+
-- Finalizer function for cdata object.
32+
local function dec() cnt = cnt - 1 end
33+
34+
local function new_obj()
35+
-- Use an array type to make it possible to set a GC finalizer.
36+
local obj = ffi.new('uint64_t [1]')
37+
-- Insert the object into the finalizer table.
38+
ffi.gc(obj, dec)
39+
cnt = cnt + 1
40+
-- The empirical assertion check. Without rehashing, there are
41+
-- more cdata objects than the mentioned limit.
42+
assert(cnt < 10e6, 'too much cdata alive at the moment')
43+
return obj
44+
end
45+
46+
-- Reset the GC.
47+
collectgarbage('collect')
48+
49+
local t = {}
50+
-- Don't use too huge limit to avoid the test run being too long.
51+
-- But it is big enough to trigger the assertion above without
52+
-- cdata finalizer table rehashing at the end of the GC cycle.
53+
for i = 1, 3e7 do
54+
table.insert(t, new_obj())
55+
-- Reset the table. Now cdata objects are collectable.
56+
if i % 3.5e6 == 0 then
57+
t = {}
58+
end
59+
end
60+
61+
test:ok(true, 'not too much cdata alive at the moment')
62+
test:done(true)

0 commit comments

Comments
 (0)