diff --git a/api/api.py b/api/api.py index 5a9b56b6c..f7a2af5b6 100644 --- a/api/api.py +++ b/api/api.py @@ -5,6 +5,7 @@ import os import argparse import asyncio +import time import json import signal @@ -432,6 +433,26 @@ async def server_worker_start( with storage.open(get_global_log_path(), "a") as global_log: global_log.write(f"Model loaded successfully: {model_name}\n") + + try: + job = job_get(job_id) + experiment_id = job.get("experiment_id") + await job_update_status(job_id=job_id, status="RUNNING", experiment_id=experiment_id) + except Exception: + # best effort only + pass + try: + from lab import Job # noqa: used for manipulating job_data directly + + j = Job.get(job_id) + jd = j.get_job_data() or {} + tail = jd.get("tail", []) + tail.append(f"{time.strftime('%Y-%m-%d %H:%M:%S')} | INFO | launcher | Process started, waiting for readiness") + jd["tail"] = tail + j.set_job_data(jd) + except Exception: + pass + return {"status": "success", "job_id": job_id} diff --git a/api/transformerlab/shared/shared.py b/api/transformerlab/shared/shared.py index 36a98b0e9..72ef4affd 100644 --- a/api/transformerlab/shared/shared.py +++ b/api/transformerlab/shared/shared.py @@ -315,8 +315,18 @@ async def async_run_python_daemon_and_update_status( with storage.open(pid_file, "w") as f: f.write(str(pid)) + # Mark job as RUNNING immediately after process starts so frontend sees progress + try: + job = job_service.job_get(job_id) + experiment_id = job.get("experiment_id") + await job_update_status(job_id=job_id, status="RUNNING", experiment_id=experiment_id) + except Exception: + # best effort only + pass + # keep a tail of recent lines so we can show them on failure: - recent_lines = deque(maxlen=10) + recent_lines = deque(maxlen=50) + last_update_time = 0.0 line = await process.stdout.readline() error_msg = None @@ -324,6 +334,20 @@ async def async_run_python_daemon_and_update_status( decoded = line.decode() recent_lines.append(decoded.strip()) + try: + now = time.time() + if now - last_update_time >= 1.0: + try: + job = Job.get(job_id) + job_data = job.get_job_data() or {} + job_data["tail"] = list(recent_lines) + job.set_job_data(job_data) + except Exception: + # Best-effort only; don't fail the loop + pass + last_update_time = now + except Exception: + pass # If we hit the begin_string then the daemon is started and we can return! if begin_string in decoded: if set_process_id_function is not None: @@ -332,7 +356,8 @@ async def async_run_python_daemon_and_update_status( print(f"Worker job {job_id} started successfully") job = job_service.job_get(job_id) experiment_id = job["experiment_id"] - await job_update_status(job_id=job_id, status="COMPLETE", experiment_id=experiment_id) + + await job_update_status(job_id=job_id, status="RUNNING", experiment_id=experiment_id) # Schedule the read_process_output coroutine in the current event # so we can keep watching this process, but return back to the caller diff --git a/src/renderer/components/Experiment/Foundation/RunModelButton.tsx b/src/renderer/components/Experiment/Foundation/RunModelButton.tsx index ada7c8072..a90a3e9b1 100644 --- a/src/renderer/components/Experiment/Foundation/RunModelButton.tsx +++ b/src/renderer/components/Experiment/Foundation/RunModelButton.tsx @@ -17,6 +17,8 @@ import { } from 'lucide-react'; import { RiImageAiLine } from 'react-icons/ri'; import { useEffect, useState } from 'react'; +import useSWR from 'swr'; +import { fetcher } from 'renderer/lib/transformerlab-api-sdk'; import { activateWorker, @@ -41,6 +43,11 @@ function removeServerFromEndOfString(str) { } } +function stripAnsiCodes(text) { + if (!text) return ''; + return text.replace(/\u001b\[[0-9;]*m/g, ''); +} + export default function RunModelButton({ experimentInfo, killWorker, @@ -49,6 +56,78 @@ export default function RunModelButton({ setLogsDrawerOpen = null, }) { const [jobId, setJobId] = useState(null); + const apiUrl = chatAPI.API_URL(); + + const jobsListKey = + experimentInfo?.id && apiUrl + ? chatAPI.Endpoints.Jobs.GetJobsOfType( + experimentInfo.id, + 'LOAD_MODEL', + '', + ) + : null; + const { data: loadModelJobs } = useSWR(jobsListKey, fetcher, { + refreshInterval: 3000, + fallbackData: [], + }); + + const jobIdForLogs = + jobId && jobId !== -1 + ? jobId + : Array.isArray(loadModelJobs) && loadModelJobs.length > 0 + ? loadModelJobs[0]?.id + : null; + + // Read tail from job data (prefer jobIdForLogs if available) + const { data: jobLogsData } = useSWR( + jobIdForLogs && experimentInfo?.id + ? chatAPI.Endpoints.Jobs.Get(experimentInfo.id, String(jobIdForLogs)) + : null, + fetcher, + { refreshInterval: 2000 }, + ); + + const [jobLogFull, setJobLogFull] = useState(null); + const [jobLogLine, setJobLogLine] = useState(null); + + useEffect(() => { + const tail = jobLogsData?.job_data?.tail; + if (!tail || !Array.isArray(tail) || tail.length === 0) { + setJobLogFull(null); + setJobLogLine(null); + return; + } + + const lines = tail.slice().filter(Boolean); + let candidate = lines + .slice() + .reverse() + .find((l: string) => { + if (!l || !l.trim()) return false; + // skip very noisy argument dumps + if (/args:\s*Namespace\(/i.test(l)) return false; + return true; + }); + + // fallback to the absolute last non-empty line if all were filtered + if (!candidate) { + candidate = lines + .slice() + .reverse() + .find((l: string) => l && l.trim()); + } + + const full = stripAnsiCodes(candidate || ''); + setJobLogFull(full); + + const MAX_LEN = 140; + if (full.length > MAX_LEN) { + const truncated = full.slice(0, MAX_LEN - 1).trimEnd() + '…'; + setJobLogLine(truncated); + } else { + setJobLogLine(full || null); + } + }, [jobLogsData]); const [stopping, setStopping] = useState(false); const [showRunSettings, setShowRunSettings] = useState(false); const [inferenceSettings, setInferenceSettings] = useState({ @@ -293,67 +372,167 @@ export default function RunModelButton({ checkValidDiffusion(); }, [experimentInfo?.config?.foundation]); + // Add a SWR poll for the job status so we can show load progress + const { data: jobStatusData } = useSWR( + jobId && jobId !== -1 && experimentInfo?.id + ? chatAPI.Endpoints.Jobs.Get(experimentInfo.id, jobId) + : null, + fetcher, + { refreshInterval: 2000 }, + ); + function Engine() { return ( <> {models === null ? ( <> - + const job_id = response?.job_id; + setJobId(job_id); + mutate(); + }} + disabled={!isPossibleToRunAModel()} + > + {isPossibleToRunAModel() ? 'Run' : 'No Available Engine'} + + + {jobId != null && + // Show logs if the job_data.tail has entries, OR while job is still in-flight. + ((jobLogsData?.job_data?.tail && + jobLogsData.job_data.tail.length > 0) || + !jobStatusData || + !['SUCCESS', 'COMPLETE', 'FAILED', 'STOPPED'].includes( + jobStatusData?.status, + )) && ( + +
+ {jobLogsData?.job_data?.tail && + jobLogsData.job_data.tail.length > 0 ? ( + + {jobLogLine || 'Starting model...'} + + ) : jobStatusData?.job_data?.message ? ( + + {jobStatusData.job_data.message} + + ) : ( + + {jobStatusData?.status || 'Starting model...'} + + )} + {jobStatusData?.job_data?.progress != null && ( + + Progress: {jobStatusData.job_data.progress}% + + )} +
+
+ )} + ) : (