From 9af9d4c9347b8931ae0be689715ea00e9178822a Mon Sep 17 00:00:00 2001 From: Dean De Leo Date: Mon, 25 Aug 2025 23:12:36 +0200 Subject: [PATCH] Record codegen lock waiting time - Add control for recording wait time to Julia mutexes - Enable this control for the codegen lock - Record the codegen lock waiting time in `jl_generate_fptr` when called from `jl_compile_method_internal` - Emit the codegen lock waiting time as well as the codegen lock nesting level in compilation traces --- src/gf.c | 31 ++++++++++++++++++++++++++----- src/init.c | 1 + src/julia.h | 1 + src/julia_threads.h | 1 + src/task.c | 1 + src/threading.c | 4 ++++ 6 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/gf.c b/src/gf.c index db987e1a2ddf4..2b1d549fe0f80 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2381,7 +2381,7 @@ JL_DLLEXPORT void jl_force_trace_compile_timing_disable(void) jl_atomic_fetch_add(&jl_force_trace_compile_timing_enabled, -1); } -static void record_precompile_statement(jl_method_instance_t *mi, double compilation_time) +static void record_precompile_statement(jl_method_instance_t *mi, double compilation_time, double codegen_lock_waiting_time) { static ios_t f_precompile; static JL_STREAM* s_precompile = NULL; @@ -2392,6 +2392,12 @@ static void record_precompile_statement(jl_method_instance_t *mi, double compila if (!jl_is_method(def)) return; + // nested layer of this precompile statement + int nested_level = 0; + if (jl_atomic_load_relaxed(&(jl_codegen_lock.owner)) == jl_current_task) { + nested_level = jl_codegen_lock.count; + } + JL_LOCK(&precomp_statement_out_lock); if (s_precompile == NULL) { const char *t = jl_options.trace_compile; @@ -2409,7 +2415,20 @@ static void record_precompile_statement(jl_method_instance_t *mi, double compila jl_printf(s_precompile, "#= %6.1f =# ", compilation_time / 1e6); jl_printf(s_precompile, "precompile("); jl_static_show(s_precompile, mi->specTypes); - jl_printf(s_precompile, ")\n"); + jl_printf(s_precompile, ")"); + + // compilation inline properties/files as a json dictionary + if (force_trace_compile || jl_options.trace_compile_timing) { + jl_printf(s_precompile, " #= { "); // json starts + // nested level: 0 -> not nested, >= 1, nested amount + jl_printf(s_precompile, "\"nested_levels\": %d, ", nested_level); + // time spent waiting to acquire the codegen lock, in milliseconds + jl_printf(s_precompile, "\"codegen_waiting_time\": %.2f ", codegen_lock_waiting_time / 1e6); + + jl_printf(s_precompile, "} =#"); // json ends + } + jl_printf(s_precompile, "\n"); + if (s_precompile != JL_STDERR) ios_flush(&f_precompile); } @@ -2569,7 +2588,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t codeinst->rettype_const = unspec->rettype_const; jl_atomic_store_release(&codeinst->invoke, unspec_invoke); jl_mi_cache_insert(mi, codeinst); - record_precompile_statement(mi, 0); + record_precompile_statement(mi, 0, 0); return codeinst; } } @@ -2586,7 +2605,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t 0, 1, ~(size_t)0, 0, 0, jl_nothing, 0); jl_atomic_store_release(&codeinst->invoke, jl_fptr_interpret_call); jl_mi_cache_insert(mi, codeinst); - record_precompile_statement(mi, 0); + record_precompile_statement(mi, 0, 0); return codeinst; } if (compile_option == JL_OPTIONS_COMPILE_OFF) { @@ -2598,7 +2617,9 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t double compile_time = jl_hrtime(); int did_compile = 0; + uint64_t codegen_lock_wt_before = jl_current_task->lock_waiting_time; codeinst = jl_generate_fptr(mi, world, &did_compile); + uint64_t codegen_lock_waiting_time = jl_current_task->lock_waiting_time - codegen_lock_wt_before; compile_time = jl_hrtime() - compile_time; if (!codeinst) { @@ -2641,7 +2662,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t jl_mi_cache_insert(mi, codeinst); } else if (did_compile) { - record_precompile_statement(mi, compile_time); + record_precompile_statement(mi, compile_time, (double)codegen_lock_waiting_time); } jl_atomic_store_relaxed(&codeinst->precompile, 1); return codeinst; diff --git a/src/init.c b/src/init.c index dae5afb375916..0d716a1f08de8 100644 --- a/src/init.c +++ b/src/init.c @@ -735,6 +735,7 @@ static void init_global_mutexes(void) { JL_MUTEX_INIT(&newly_inferred_mutex, "newly_inferred_mutex"); JL_MUTEX_INIT(&global_roots_lock, "global_roots_lock"); JL_MUTEX_INIT(&jl_codegen_lock, "jl_codegen_lock"); + jl_codegen_lock.record_waiting_time = 1; // 0 = no, 1 = yes JL_MUTEX_INIT(&typecache_lock, "typecache_lock"); } diff --git a/src/julia.h b/src/julia.h index dbe4fe4deb7bf..be2148db3d6cd 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2085,6 +2085,7 @@ typedef struct _jl_task_t { size_t bufsz; // actual sizeof stkbuf unsigned int copy_stack:31; // sizeof stack for copybuf unsigned int started:1; + uint64_t lock_waiting_time; } jl_task_t; #define JL_TASK_STATE_RUNNABLE 0 diff --git a/src/julia_threads.h b/src/julia_threads.h index 141098f7dce49..fbfd5ed7d328e 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -120,6 +120,7 @@ struct _jl_task_t; typedef struct { _Atomic(struct _jl_task_t*) owner; uint32_t count; + int32_t record_waiting_time; // 0 => no (default), 1 => yes } jl_mutex_t; struct _jl_bt_element_t; diff --git a/src/task.c b/src/task.c index e614d4205e937..2d069c9e25ea0 100644 --- a/src/task.c +++ b/src/task.c @@ -1117,6 +1117,7 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion #ifdef _COMPILER_ASAN_ENABLED_ t->ctx.asan_fake_stack = NULL; #endif + t->lock_waiting_time = 0; return t; } diff --git a/src/threading.c b/src/threading.c index 1478d233e3a38..875b9f6b8aa57 100644 --- a/src/threading.c +++ b/src/threading.c @@ -800,6 +800,7 @@ void _jl_mutex_init(jl_mutex_t *lock, const char *name) JL_NOTSAFEPOINT { jl_atomic_store_relaxed(&lock->owner, (jl_task_t*)NULL); lock->count = 0; + lock->record_waiting_time = 0; jl_profile_lock_init(lock, name); } @@ -818,10 +819,13 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint) return; } JL_TIMING(LOCK_SPIN, LOCK_SPIN); + uint64_t t0 = lock->record_waiting_time ? jl_hrtime() : 0; while (1) { if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) { lock->count = 1; jl_profile_lock_acquired(lock); + if (lock->record_waiting_time) + self->lock_waiting_time += jl_hrtime() - t0; return; } if (safepoint) {