From 65f73b0399487f6187a06acb55e53b79b6b5fa9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Drvo=C5=A1t=C4=9Bp?=
 <tomas.drvostep@gmail.com>
Date: Tue, 17 Sep 2024 18:52:36 +0200
Subject: [PATCH] Introduce Testitem-local Timer + debug outputs

---
 src/ReTestItems.jl | 20 ++++++++++++++++----
 src/log_capture.jl | 19 +++++++++++++++----
 2 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index a6ff0beb..79b56428 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -349,7 +349,7 @@ function _runtests_in_current_env(
                 run_number = 1
                 max_runs = 1 + max(retries, testitem.retries)
                 while run_number ≤ max_runs
-                    res = runtestitem(testitem, ctx; test_end_expr, verbose_results, logs)
+                    res = runtestitem(testitem, ctx; test_end_expr, verbose_results, logs, timeout=something(testitem.timeout, testitem_timeout))
                     ts = res.testset
                     print_errors_and_captured_logs(testitem, run_number; logs)
                     report_empty_testsets(testitem, ts)
@@ -414,7 +414,7 @@ end
 # The provided `worker_num` is only for logging purposes, and not persisted as part of the worker.
 function start_worker(proj_name, nworker_threads, worker_init_expr, ntestitems; worker_num=nothing)
     w = Worker(; threads="$nworker_threads")
-    i = worker_num == nothing ? "" : " $worker_num"
+    i = worker_num === nothing ? "" : " $worker_num"
     # remote_fetch here because we want to make sure the worker is all setup before starting to eval testitems
     remote_fetch(w, quote
         using ReTestItems, Test
@@ -518,7 +518,7 @@ function manage_worker(
         end
         testitem.workerid[] = worker.pid
         timeout = something(testitem.timeout, default_timeout)
-        fut = remote_eval(worker, :(ReTestItems.runtestitem($testitem, GLOBAL_TEST_CONTEXT; test_end_expr=$(QuoteNode(test_end_expr)), verbose_results=$verbose_results, logs=$(QuoteNode(logs)))))
+        fut = remote_eval(worker, :(ReTestItems.runtestitem($testitem, GLOBAL_TEST_CONTEXT; test_end_expr=$(QuoteNode(test_end_expr)), verbose_results=$verbose_results, logs=$(QuoteNode(logs)), timeout=$timeout)))
         max_runs = 1 + max(retries, testitem.retries)
         try
             timer = Timer(timeout) do tm
@@ -919,11 +919,22 @@ end
 # when `runtestitem` called directly or `@testitem` called outside of `runtests`.
 function runtestitem(
     ti::TestItem, ctx::TestContext;
-    test_end_expr::Union{Nothing,Expr}=nothing, logs::Symbol=:eager, verbose_results::Bool=true, finish_test::Bool=true,
+    test_end_expr::Union{Nothing,Expr}=nothing, logs::Symbol=:eager, verbose_results::Bool=true, finish_test::Bool=true, timeout::Int=something(ti.timeout, DEFAULT_TESTITEM_TIMEOUT)
 )
     if should_skip(ti)::Bool
         return skiptestitem(ti, ctx; verbose_results)
     end
+
+    # TODO: How to control this? Should we reuse `timeout_profile_wait`?
+    timer = Timer(timeout - clamp(0.2 * timeout, 0.25, 5.0)) do _
+        if logs !== :eager
+            @loglock _redirect_logs(debugpath(ti)) do
+                ccall(:jl_print_task_backtraces, Cvoid, ())
+            end
+        else
+            ccall(:jl_print_task_backtraces, Cvoid, ())
+        end
+    end
     if test_end_expr === nothing
         has_test_end_expr = false
         test_end_expr = Expr(:block)
@@ -1012,6 +1023,7 @@ function runtestitem(
             (Test.Base).current_exceptions(),
             LineNumberNode(ti.line, ti.file)))
     finally
+        close(timer)
         # Make sure all test setup logs are commited to file
         foreach(ts->isassigned(ts.logstore) && flush(ts.logstore[]), ti.testsetups)
         ts1 = Test.pop_testset()
diff --git a/src/log_capture.jl b/src/log_capture.jl
index 6d36ac9d..2244a0d0 100644
--- a/src/log_capture.jl
+++ b/src/log_capture.jl
@@ -97,6 +97,7 @@ function logfile_name(ts::TestSetup)
 end
 logpath(ti::TestItem, i=nothing) = joinpath(RETESTITEMS_TEMP_FOLDER[], logfile_name(ti, i))
 logpath(ts::TestSetup) = joinpath(RETESTITEMS_TEMP_FOLDER[], logfile_name(ts))
+debugpath(ti::TestItem, i=nothing) = logpath(ti, i) * ".task_backtraces"
 
 """
     _redirect_logs(f, target::Union{IO,String})
@@ -139,6 +140,7 @@ _has_logs(ts::TestSetup) = filesize(logpath(ts)) > 0
 # The path might not exist if a testsetup always throws an error and we don't get to actually
 # evaluate the test item.
 _has_logs(ti::TestItem, i=nothing) = (path = logpath(ti, i); (isfile(path) && filesize(path) > 0))
+_has_debug(ti::TestItem, i=nothing) = (path = debugpath(ti, i); isfile(path) && filesize(path) > 0)
 # Stats to help diagnose OOM issues.
 _mem_watermark() = string(
     # Tracks the peak memory usage of a process / worker
@@ -172,7 +174,7 @@ function print_errors_and_captured_logs(
 )
     ts = ti.testsets[run_number]
     has_errors = any_non_pass(ts)
-    has_logs = _has_logs(ti, run_number) || any(_has_logs, ti.testsetups)
+    has_logs = _has_logs(ti, run_number) || any(_has_logs, ti.testsetups) || _has_debug(ti, run_number)
     if has_errors || logs == :batched
         report_iob = IOContext(IOBuffer(), :color=>Base.get_have_color())
         println(report_iob)
@@ -193,6 +195,7 @@ function print_errors_and_captured_logs(
     end
     # If we have errors, keep the tesitem log file for JUnit report.
     !has_errors && rm(logpath(ti, run_number), force=true)
+    !has_errors && rm(debugpath(ti, run_number), force=true) # TODO: move the debug logs to the log file?
     return nothing
 end
 
@@ -217,14 +220,22 @@ function _print_captured_logs(io, ti::TestItem, run_number::Int)
     for setup in ti.testsetups
         _print_captured_logs(io, setup, ti)
     end
+    has_debug = _has_debug(ti, run_number)
     has_logs = _has_logs(ti, run_number)
-    bold_text = has_logs ? "Captured Logs" : "No Captured Logs"
+    has_printable = has_logs || has_debug
+
+    bold_text = has_printable ? "Captured Logs" : "No Captured Logs"
     printstyled(io, bold_text; bold=true, color=Base.info_color())
     print(io, " for test item $(repr(ti.name)) at ")
     printstyled(io, _file_info(ti); bold=true, color=:default)
     println(io, _on_worker(ti))
-    has_logs && open(logpath(ti, run_number), "r") do logstore
-        write(io, logstore)
+    if has_printable
+        has_logs && open(logpath(ti, run_number), "r") do logstore
+            write(io, logstore)
+        end
+        has_debug && open(debugpath(ti, run_number), "r") do logstore
+            write(io, logstore)
+        end
     end
     return nothing
 end