diff --git a/src-tauri/src/agents/openai_compat.rs b/src-tauri/src/agents/openai_compat.rs index 1e926f69..cd1997d0 100644 --- a/src-tauri/src/agents/openai_compat.rs +++ b/src-tauri/src/agents/openai_compat.rs @@ -87,6 +87,41 @@ pub fn lmstudio_base_url() -> String { .unwrap_or_else(|_| "http://localhost:1234".into()) } +/// vLLM base URL (default: localhost:8000) +pub fn vllm_base_url() -> String { + std::env::var("VLLM_ENDPOINT") + .unwrap_or_else(|_| "http://localhost:8000".into()) +} + +/// Discover vLLM models via OpenAI-compatible `/v1/models` endpoint. +pub fn discover_vllm() -> Option> { + let endpoint = vllm_base_url(); + let url = format!("{}/v1/models", endpoint.trim_end_matches('/')); + + let client = reqwest::blocking::Client::builder() + .timeout(std::time::Duration::from_secs(3)) + .build() + .ok()?; + + let mut req = client.get(&url); + if let Ok(token) = std::env::var("VLLM_API_KEY") { + req = req.header("Authorization", format!("Bearer {}", token)); + } + let resp = req.send().ok()?; + if !resp.status().is_success() { + eprintln!("[openai_compat] vllm {} → {}", url, resp.status()); + return None; + } + + let body: serde_json::Value = resp.json().ok()?; + let data = body.get("data")?.as_array()?; + let models: Vec = data.iter() + .filter_map(|m| m.get("id").and_then(|v| v.as_str()).map(String::from)) + .collect(); + + if models.is_empty() { None } else { Some(models) } +} + pub async fn stream_run_with_base( input: RunInput, base: String, @@ -128,7 +163,13 @@ where tools: Some(tools_json), }; - let engine_name = if base.contains(":1234") || base.contains("lmstudio") { "LM Studio" } else { "Ollama" }; + let engine_name = if base.contains(":1234") || base.contains("lmstudio") { + "LM Studio" + } else if base.contains(":8000") || base.contains("vllm") { + "vLLM" + } else { + "Ollama" + }; on_progress(format!("{} ({}) initializing...", engine_name, model)); let client = Client::builder() @@ -137,10 +178,14 @@ where .map_err(|e| AppError::Agent(format!("HTTP client build failed: {}", e)))?; let mut req = client.post(&url).json(&body); - if let Ok(token) = std::env::var("LMSTUDIO_API_KEY") { - if engine_name == "LM Studio" { - req = req.header("Authorization", format!("Bearer {}", token)); - } + // Apply API key based on engine + let api_key = match engine_name { + "LM Studio" => std::env::var("LMSTUDIO_API_KEY").ok(), + "vLLM" => std::env::var("VLLM_API_KEY").ok(), + _ => None, + }; + if let Some(token) = api_key { + req = req.header("Authorization", format!("Bearer {}", token)); } let response = req .send() @@ -203,7 +248,13 @@ where tools: None, }; - let engine_name = if base.contains(":1234") { "LM Studio" } else { "Ollama" }; + let engine_name = if base.contains(":1234") || base.contains("lmstudio") { + "LM Studio" + } else if base.contains(":8000") || base.contains("vllm") { + "vLLM" + } else { + "Ollama" + }; on_progress(format!("{} ({}) running (no tools)...", engine_name, model)); let client = Client::builder() @@ -212,10 +263,13 @@ where .map_err(|e| AppError::Agent(format!("HTTP client build failed: {}", e)))?; let mut req = client.post(&url).json(&body); - if let Ok(token) = std::env::var("LMSTUDIO_API_KEY") { - if engine_name == "LM Studio" { - req = req.header("Authorization", format!("Bearer {}", token)); - } + let api_key = match engine_name { + "LM Studio" => std::env::var("LMSTUDIO_API_KEY").ok(), + "vLLM" => std::env::var("VLLM_API_KEY").ok(), + _ => None, + }; + if let Some(token) = api_key { + req = req.header("Authorization", format!("Bearer {}", token)); } let response = req.send().await .map_err(|e| AppError::Agent(format!("OpenAI-compatible API 요청 실패: {}", e)))?; diff --git a/src-tauri/src/commands/agent_detect.rs b/src-tauri/src/commands/agent_detect.rs index fe3a4b9e..1402a536 100644 --- a/src-tauri/src/commands/agent_detect.rs +++ b/src-tauri/src/commands/agent_detect.rs @@ -246,24 +246,82 @@ async fn probe_lmstudio(endpoint: &str) -> AgentDetection { det } +async fn probe_vllm(endpoint: &str) -> AgentDetection { + // vLLM uses OpenAI-compatible /v1/models endpoint + let base_raw = endpoint.trim_end_matches('/'); + let base = if base_raw.ends_with("/v1") { base_raw.to_string() } else { format!("{}/v1", base_raw) }; + let url = format!("{}/models", base); + + let mut det = AgentDetection { + engine: "vllm".into(), + kind: "http", + installed: false, + version: None, + path: None, + endpoint: Some(base_raw.to_string()), + models: vec![], + note: None, + }; + + let client = match reqwest::Client::builder() + .timeout(Duration::from_millis(PROBE_TIMEOUT_MS)) + .build() + { + Ok(c) => c, + Err(e) => { det.note = Some(format!("reqwest build error: {e}")); return det; } + }; + + eprintln!("[agent-detect] probe vllm: GET {}", url); + match client.get(&url).send().await { + Ok(resp) if resp.status().is_success() => { + match resp.json::().await { + Ok(body) => { + det.installed = true; + det.models = body.data.into_iter().map(|m| m.id).collect(); + eprintln!("[agent-detect] vllm ok — {} models", det.models.len()); + } + Err(e) => { + eprintln!("[agent-detect] vllm parse error: {e}"); + det.note = Some(format!("응답 파싱 실패: {e}")); + } + } + } + Ok(resp) => { + let status = resp.status(); + eprintln!("[agent-detect] vllm status {}", status); + det.note = Some(format!("HTTP {status}")); + } + Err(e) => { + eprintln!("[agent-detect] vllm unreachable: {e}"); + det.note = Some(if e.is_timeout() { "timeout".into() } else { "not reachable".into() }); + } + } + det +} + // ─── Tauri command ─────────────────────────────────────────────────────────── #[tauri::command] pub async fn detect_available_agents( ollama_endpoint: Option, lmstudio_endpoint: Option, + vllm_endpoint: Option, ) -> Vec { let ollama_ep = ollama_endpoint.unwrap_or_else(|| "http://localhost:11434".into()); let lmstudio_ep = lmstudio_endpoint.unwrap_or_else(|| "http://localhost:1234/v1".into()); + let vllm_ep = vllm_endpoint.unwrap_or_else(|| { + std::env::var("VLLM_ENDPOINT").unwrap_or_else(|_| "http://localhost:8000".into()) + }); // CLI probes — 병렬 - let (claude, codex, gemini, ollama, lmstudio) = tokio::join!( + let (claude, codex, gemini, ollama, lmstudio, vllm) = tokio::join!( probe_cli("claude", "claude", &["--version"]), probe_cli("codex", "codex", &["--version"]), probe_cli("gemini", "gemini", &["--version"]), probe_ollama(&ollama_ep), probe_lmstudio(&lmstudio_ep), + probe_vllm(&vllm_ep), ); - vec![claude, codex, gemini, ollama, lmstudio] + vec![claude, codex, gemini, ollama, lmstudio, vllm] } diff --git a/src-tauri/src/commands/agents.rs b/src-tauri/src/commands/agents.rs index e8b8167e..cc94b8f2 100644 --- a/src-tauri/src/commands/agents.rs +++ b/src-tauri/src/commands/agents.rs @@ -498,9 +498,13 @@ pub async fn start_openai_compat_stream( ) -> Result { let db = state.inner().clone(); let db_post = state.inner().clone(); - let is_lmstudio = input.engine.as_deref() == Some("lmstudio"); - let engine_label = if is_lmstudio { "lmstudio" } else { "ollama" }; - eprintln!("[openai-compat] engine={:?} model={:?} is_lmstudio={}", input.engine, input.model, is_lmstudio); + let engine_key = input.engine.as_deref().unwrap_or("ollama"); + let engine_label = match engine_key { + "lmstudio" => "lmstudio", + "vllm" => "vllm", + _ => "ollama", + }; + eprintln!("[openai-compat] engine={:?} model={:?} engine_label={}", input.engine, input.model, engine_label); let id_frag = identity_fragment(&input, engine_label); let write_arc = db_write_arc(&state); let cid = input.conversation_id.clone(); @@ -529,10 +533,10 @@ pub async fn start_openai_compat_stream( .filter(|s| !s.is_empty()) .map(str::to_owned) .unwrap_or_else(|| { - if is_lmstudio { - openai_compat::lmstudio_base_url() - } else { - std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".into()) + match engine_label { + "lmstudio" => openai_compat::lmstudio_base_url(), + "vllm" => openai_compat::vllm_base_url(), + _ => std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".into()), } }); @@ -607,7 +611,7 @@ pub fn run_eval_agent( "codex" => codex::run(run_input), "gemini" => gemini::run(run_input), "opencode" => opencode::run(run_input), - "ollama" => openai_compat::run(run_input), + "ollama" | "vllm" => openai_compat::run(run_input), _ => claude::run(run_input), }; let duration_ms = t0.elapsed().as_millis() as i64; diff --git a/src-tauri/src/commands/model_discovery.rs b/src-tauri/src/commands/model_discovery.rs index f9bc9b3f..49f64eb2 100644 --- a/src-tauri/src/commands/model_discovery.rs +++ b/src-tauri/src/commands/model_discovery.rs @@ -88,6 +88,7 @@ fn fallback_models(engine: &str) -> Vec<(&'static str, &'static str, bool)> { ("phi-4:latest", "Phi-4", false), ], "lmstudio" => vec![], // LM Studio models are always discovered live + "vllm" => vec![], // vLLM models are always discovered live _ => vec![], } } @@ -366,7 +367,7 @@ fn discover_lmstudio() -> Option> { // ─── Core API ─────────────────────────────────────────────────────────────── -const ENGINES: &[&str] = &["claude", "codex", "gemini", "ollama", "lmstudio"]; +const ENGINES: &[&str] = &["claude", "codex", "gemini", "ollama", "lmstudio", "vllm"]; fn get_models_for_engine(engine: &str, force: bool) -> (Vec, String) { // Check cache — invalidate early if the tracked binary's mtime has changed @@ -398,6 +399,7 @@ fn get_models_for_engine(engine: &str, force: bool) -> (Vec, String) { }, "ollama" => (crate::agents::openai_compat::discover_models(), None), "lmstudio" => (discover_lmstudio(), None), + "vllm" => (crate::agents::openai_compat::discover_vllm(), None), _ => (None, None), }; diff --git a/src-tauri/src/commands/roundtable_helpers/executor.rs b/src-tauri/src/commands/roundtable_helpers/executor.rs index bb5d2a8e..da176426 100644 --- a/src-tauri/src/commands/roundtable_helpers/executor.rs +++ b/src-tauri/src/commands/roundtable_helpers/executor.rs @@ -52,6 +52,7 @@ pub async fn run_participant( "gemini" => (gemini::run(run_input), "gemini"), "opencode" => (opencode::run(run_input), "opencode"), "ollama" => (openai_compat::run(run_input), "ollama"), + "vllm" => (openai_compat::run(run_input), "vllm"), _ => ( Err(AppError::Agent(format!("unsupported engine: {}", engine_key_owned))), "unknown", @@ -168,7 +169,7 @@ pub(super) async fn stream_participant( .await .unwrap_or_else(|_| (Err(AppError::Agent("participant task panicked".into())), "unknown")) } - "ollama" => { + "ollama" | "vllm" => { let a = app.clone(); let mi = msg_id.clone(); let ci = conversation_id.clone(); let on_chunk = { let a = a.clone(); let mi = mi.clone(); let ci = ci.clone(); @@ -179,7 +180,8 @@ pub(super) async fn stream_participant( } }; let on_progress = |_: String| {}; - (openai_compat::stream_run(run_input, on_progress, on_chunk).await, "ollama") + let label = if engine_key_owned == "vllm" { "vllm" } else { "ollama" }; + (openai_compat::stream_run(run_input, on_progress, on_chunk).await, label) } "opencode" => { tokio::task::spawn_blocking(move || { diff --git a/src/components/tunaflow/AgentAvatar.tsx b/src/components/tunaflow/AgentAvatar.tsx index 26719acf..1fac4592 100644 --- a/src/components/tunaflow/AgentAvatar.tsx +++ b/src/components/tunaflow/AgentAvatar.tsx @@ -11,6 +11,7 @@ const ENGINE_ICONS: Record = { const ENGINE_INITIALS: Record = { ollama: "O", lmstudio: "L", + vllm: "V", }; interface AgentAvatarProps { diff --git a/src/components/tunaflow/CreateRoundtableDialog.tsx b/src/components/tunaflow/CreateRoundtableDialog.tsx index 4470fc3b..4dffe2bc 100644 --- a/src/components/tunaflow/CreateRoundtableDialog.tsx +++ b/src/components/tunaflow/CreateRoundtableDialog.tsx @@ -14,7 +14,7 @@ const RT_MODES: { id: RtMode; label: string; desc: string }[] = [ { id: "deliberative", label: "Deliberative", desc: "Round 1 independent, Round 2+ reflects on all" }, ]; -const ENGINES = ["claude", "codex", "gemini", "ollama", "lmstudio"] as const; +const ENGINES = ["claude", "codex", "gemini", "ollama", "lmstudio", "vllm"] as const; interface CreateRoundtableDialogProps { open: boolean; diff --git a/src/components/tunaflow/MetaAgentSelector.tsx b/src/components/tunaflow/MetaAgentSelector.tsx index 607222f7..ebbb841c 100644 --- a/src/components/tunaflow/MetaAgentSelector.tsx +++ b/src/components/tunaflow/MetaAgentSelector.tsx @@ -46,6 +46,7 @@ const ENGINE_META: Record = { gemini: { label: "Gemini", installHintKey: "", installHint: "npm install -g @google/gemini-cli", docLink: "https://ai.google.dev/gemini-api/docs/cli" }, ollama: { label: "Ollama", installHintKey: "ollama_install_hint", defaultEndpoint: "http://localhost:11434" }, lmstudio: { label: "LM Studio", installHintKey: "lmstudio_install_hint", defaultEndpoint: "http://localhost:1234/v1" }, + vllm: { label: "vLLM", installHintKey: "vllm_install_hint", defaultEndpoint: "http://localhost:8000" }, }; // CLI engines whose `models` list comes from the dynamic discovery store @@ -60,6 +61,7 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) { const [detections, setDetections] = useState(null); const [ollamaEndpoint, setOllamaEndpoint] = useState("http://localhost:11434"); const [lmstudioEndpoint, setLmstudioEndpoint] = useState("http://localhost:1234/v1"); + const [vllmEndpoint, setVllmEndpoint] = useState("http://localhost:8000"); const [selectedEngine, setSelectedEngine] = useState(null); const [modelByEngine, setModelByEngine] = useState>({}); @@ -86,12 +88,13 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) { }; // Initial + on-endpoint-change detection - const runDetect = async (oEp: string, lEp: string) => { + const runDetect = async (oEp: string, lEp: string, vEp: string) => { setDetections(null); try { const result = await invoke("detect_available_agents", { ollamaEndpoint: oEp, lmstudioEndpoint: lEp, + vllmEndpoint: vEp, }); setDetections(result); } catch (e) { @@ -107,7 +110,7 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) { if (engineModels.length === 0) { loadEngineModels().catch((e) => console.warn("[meta-agent] loadEngineModels", e)); } - runDetect(ollamaEndpoint, lmstudioEndpoint); + runDetect(ollamaEndpoint, lmstudioEndpoint, vllmEndpoint); // eslint-disable-next-line react-hooks/exhaustive-deps }, []); @@ -132,15 +135,17 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) { }); }, [detections, engineModels]); - const onEndpointChange = (engine: "ollama" | "lmstudio", value: string) => { + const onEndpointChange = (engine: "ollama" | "lmstudio" | "vllm", value: string) => { if (engine === "ollama") setOllamaEndpoint(value); - else setLmstudioEndpoint(value); + else if (engine === "lmstudio") setLmstudioEndpoint(value); + else setVllmEndpoint(value); if (debounceRef.current) window.clearTimeout(debounceRef.current); debounceRef.current = window.setTimeout(() => { const o = engine === "ollama" ? value : ollamaEndpoint; const l = engine === "lmstudio" ? value : lmstudioEndpoint; - runDetect(o, l); + const v = engine === "vllm" ? value : vllmEndpoint; + runDetect(o, l, v); }, 600); }; @@ -169,7 +174,11 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) { if (!canProceed || !selectedEngine) return; const det = detections!.find((d) => d.engine === selectedEngine)!; const endpoint = det.kind === "http" - ? (selectedEngine === "ollama" ? ollamaEndpoint : lmstudioEndpoint) + ? (selectedEngine === "ollama" + ? ollamaEndpoint + : selectedEngine === "lmstudio" + ? lmstudioEndpoint + : vllmEndpoint) : undefined; onProceed({ engine: selectedEngine, @@ -253,8 +262,8 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) { Endpoint onEndpointChange(d.engine as "ollama" | "lmstudio", e.target.value)} + value={d.engine === "ollama" ? ollamaEndpoint : d.engine === "lmstudio" ? lmstudioEndpoint : vllmEndpoint} + onChange={(e) => onEndpointChange(d.engine as "ollama" | "lmstudio" | "vllm", e.target.value)} className="flex-1 text-[10px] font-mono bg-background border border-border/60 rounded px-2 py-1 focus:outline-none focus:border-primary/60" /> @@ -308,6 +317,8 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) { ? t("meta_agent.ollama_install_hint") : meta.installHintKey === "lmstudio_install_hint" ? t("meta_agent.lmstudio_install_hint") + : meta.installHintKey === "vllm_install_hint" + ? t("meta_agent.vllm_install_hint") : meta.installHint} diff --git a/src/components/tunaflow/input/EngineSelector.tsx b/src/components/tunaflow/input/EngineSelector.tsx index 6bd63930..e0ea6e4b 100644 --- a/src/components/tunaflow/input/EngineSelector.tsx +++ b/src/components/tunaflow/input/EngineSelector.tsx @@ -2,7 +2,7 @@ import { useRef, useEffect, useState } from "react"; import { cn } from "@/lib/utils"; import { ChevronDown } from "lucide-react"; -type Engine = "claude" | "codex" | "gemini" | "ollama" | "lmstudio"; +type Engine = "claude" | "codex" | "gemini" | "ollama" | "lmstudio" | "vllm"; const ENGINE_LIST: { id: Engine; label: string; color: string }[] = [ { id: "claude", label: "Claude", color: "text-agent-claude" }, @@ -10,6 +10,7 @@ const ENGINE_LIST: { id: Engine; label: string; color: string }[] = [ { id: "gemini", label: "Gemini", color: "text-agent-gemini" }, { id: "ollama", label: "Ollama", color: "text-agent-ollama" }, { id: "lmstudio", label: "LM Studio", color: "text-agent-lmstudio" }, + { id: "vllm", label: "vLLM", color: "text-agent-vllm" }, ]; interface EngineSelectorProps { diff --git a/src/components/tunaflow/settings/AgentsSection.tsx b/src/components/tunaflow/settings/AgentsSection.tsx index b84a58e1..56ac5f4d 100644 --- a/src/components/tunaflow/settings/AgentsSection.tsx +++ b/src/components/tunaflow/settings/AgentsSection.tsx @@ -19,13 +19,14 @@ import { // Keep in sync with ENGINE_CONFIGS (src/lib/engineConfig.ts). OpenCode removed; // Ollama + LMStudio share the openai-compatible runtime. -const ENGINES = ["claude", "codex", "gemini", "ollama", "lmstudio"] as const; +const ENGINES = ["claude", "codex", "gemini", "ollama", "lmstudio", "vllm"] as const; const ENGINE_LABELS: Record<(typeof ENGINES)[number], string> = { claude: "Claude", codex: "Codex", gemini: "Gemini", ollama: "Ollama", lmstudio: "LM Studio", + vllm: "vLLM", }; export function AgentsSection() { @@ -75,20 +76,22 @@ export function AgentsSection() { // Issue #175 MVP — Ollama / LM Studio base URL override. // Keyed by engine; appStore key pattern `engineEndpoint:{engine}` mirrors // existing conventions (activeSkills:{pk}, skillDetectionDismissed:{pk}). - const [endpointOverride, setEndpointOverride] = useState>({ + const [endpointOverride, setEndpointOverride] = useState>({ ollama: "", lmstudio: "", + vllm: "", }); useEffect(() => { (async () => { - const [ol, ls] = await Promise.all([ + const [ol, ls, vl] = await Promise.all([ getSetting("engineEndpoint:ollama", ""), getSetting("engineEndpoint:lmstudio", ""), + getSetting("engineEndpoint:vllm", ""), ]); - setEndpointOverride({ ollama: ol, lmstudio: ls }); + setEndpointOverride({ ollama: ol, lmstudio: ls, vllm: vl }); })(); }, []); - const handleEndpointChange = async (engine: "ollama" | "lmstudio", value: string) => { + const handleEndpointChange = async (engine: "ollama" | "lmstudio" | "vllm", value: string) => { const trimmed = value.trim(); setEndpointOverride((prev) => ({ ...prev, [engine]: trimmed })); await setSetting(`engineEndpoint:${engine}`, trimmed); @@ -160,20 +163,22 @@ export function AgentsSection() { - {/* Issue #175 MVP — Ollama / LM Studio base URL override */} - {(selected.engine === "ollama" || selected.engine === "lmstudio") && ( + {/* Issue #175 MVP — Ollama / LM Studio / vLLM base URL override */} + {(selected.engine === "ollama" || selected.engine === "lmstudio" || selected.engine === "vllm") && (
handleEndpointChange(selected.engine as "ollama" | "lmstudio", e.target.value)} + value={endpointOverride[selected.engine as "ollama" | "lmstudio" | "vllm"]} + onChange={(e) => handleEndpointChange(selected.engine as "ollama" | "lmstudio" | "vllm", e.target.value)} placeholder={ selected.engine === "ollama" ? t("agents.endpoint.placeholder_ollama") - : t("agents.endpoint.placeholder_lmstudio") + : selected.engine === "lmstudio" + ? t("agents.endpoint.placeholder_lmstudio") + : "http://localhost:8000" } className="w-full bg-background rounded-lg px-3 py-2 text-tf-caption font-mono outline-none border border-border/30 focus:border-ring/40" /> diff --git a/src/components/tunaflow/settings/RuntimeSection.tsx b/src/components/tunaflow/settings/RuntimeSection.tsx index d3fb3db4..9498937a 100644 --- a/src/components/tunaflow/settings/RuntimeSection.tsx +++ b/src/components/tunaflow/settings/RuntimeSection.tsx @@ -301,6 +301,7 @@ function InsightAgentConfig() { +
diff --git a/src/index.css b/src/index.css index 7ccf2f49..46d63983 100644 --- a/src/index.css +++ b/src/index.css @@ -32,6 +32,7 @@ --agent-opencode: oklch(0.62 0.18 145); --agent-ollama: oklch(0.65 0.15 30); --agent-lmstudio: oklch(0.60 0.16 180); + --agent-vllm: oklch(0.58 0.20 150); --status-draft: oklch(0.65 0.15 55); --status-approved: oklch(0.62 0.18 145); @@ -119,6 +120,7 @@ --color-agent-opencode: var(--agent-opencode); --color-agent-ollama: var(--agent-ollama); --color-agent-lmstudio: var(--agent-lmstudio); + --color-agent-vllm: var(--agent-vllm); --color-status-draft: var(--status-draft); --color-status-approved: var(--status-approved); --color-status-rejected: var(--status-rejected); diff --git a/src/lib/engineConfig.ts b/src/lib/engineConfig.ts index 3082a31d..6b5c09e8 100644 --- a/src/lib/engineConfig.ts +++ b/src/lib/engineConfig.ts @@ -11,4 +11,5 @@ export const ENGINE_CONFIGS: Record = { gemini: { command: "start_gemini_stream", engineKey: "gemini", label: "Gemini initializing...", hasChunkEvent: true }, ollama: { command: "start_openai_compat_stream", engineKey: "ollama", label: "Ollama initializing...", hasChunkEvent: true }, lmstudio: { command: "start_openai_compat_stream", engineKey: "lmstudio", label: "LM Studio initializing...", hasChunkEvent: true }, + vllm: { command: "start_openai_compat_stream", engineKey: "vllm", label: "vLLM initializing...", hasChunkEvent: true }, }; diff --git a/src/lib/initialSetupApply.ts b/src/lib/initialSetupApply.ts index 8965e50d..5f3521c0 100644 --- a/src/lib/initialSetupApply.ts +++ b/src/lib/initialSetupApply.ts @@ -22,7 +22,7 @@ import type { AgentProfile } from "@/types"; const KNOWN_PERSONAS = new Set(DEFAULT_PERSONAS.map((p) => p.id)); /** Engines we recognize. Matches ENGINE_CONFIGS keys. */ -const KNOWN_ENGINES = new Set(["claude", "codex", "gemini", "ollama", "lmstudio", "openai"]); +const KNOWN_ENGINES = new Set(["claude", "codex", "gemini", "ollama", "lmstudio", "vllm", "openai"]); export interface RecommendedProfile { role: string; // "architect" | "developer" | "reviewer" | … diff --git a/src/lib/sendPipeline/buildSendInput.ts b/src/lib/sendPipeline/buildSendInput.ts index 800ef255..e6678622 100644 --- a/src/lib/sendPipeline/buildSendInput.ts +++ b/src/lib/sendPipeline/buildSendInput.ts @@ -49,7 +49,7 @@ export async function buildSendInput(p: BuildSendInputParams): Promise(`engineEndpoint:${p.engine}`, ""); const trimmed = raw.trim(); if (trimmed) customBaseUrl = trimmed; diff --git a/src/locales/en/dialog.json b/src/locales/en/dialog.json index bbe28186..ff99571b 100644 --- a/src/locales/en/dialog.json +++ b/src/locales/en/dialog.json @@ -99,6 +99,7 @@ "skip_overlay_confirm": "Skip and proceed", "ollama_install_hint": "Install from https://ollama.com/download then run `ollama serve`", "lmstudio_install_hint": "Install from https://lmstudio.ai then start the Local Server", + "vllm_install_hint": "Install with `pip install vllm` then run `vllm serve --host 0.0.0.0 --port 8000`", "model_loading": "Loading models...", "model_empty": "No models — install or refresh via Settings → Runtime" }, diff --git a/src/locales/ko/dialog.json b/src/locales/ko/dialog.json index a24b84a3..16e3e20f 100644 --- a/src/locales/ko/dialog.json +++ b/src/locales/ko/dialog.json @@ -99,6 +99,7 @@ "skip_overlay_confirm": "건너뛰고 진행", "ollama_install_hint": "https://ollama.com/download 에서 설치 후 `ollama serve`", "lmstudio_install_hint": "https://lmstudio.ai 에서 설치 후 Local Server 시작", + "vllm_install_hint": "`pip install vllm` 후 `vllm serve --host 0.0.0.0 --port 8000`", "model_loading": "모델 목록 로딩 중...", "model_empty": "모델 없음 — 설치 또는 Settings → Runtime 에서 새로고침" }, diff --git a/src/types/index.ts b/src/types/index.ts index ad3ede8a..756485b1 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -182,7 +182,7 @@ export interface SendWithClaudeInput { userMessageId?: string; prompt: string; model?: string; - /** Engine key for backend routing (e.g. "ollama" vs "lmstudio") */ + /** Engine key for backend routing (e.g. "ollama" / "lmstudio" / "vllm") */ engine?: string; /** Passed directly when no agent is selected */ systemPrompt?: string; @@ -205,16 +205,16 @@ export interface SendWithClaudeInput { /** Absolute paths of image attachments — used by Codex CLI (`-i `). * Other engines read the image via `Read` tool from the prompt path section. */ imagePaths?: string[]; - /** Base URL override for OpenAI-compatible engines (ollama / lmstudio). - * Empty/undefined falls back to env var (OLLAMA_HOST / LMSTUDIO_ENDPOINT) - * then the hardcoded default. Issue #175 MVP. */ + /** Base URL override for OpenAI-compatible engines (ollama / lmstudio / vllm). + * Empty/undefined falls back to env var (OLLAMA_HOST / LMSTUDIO_ENDPOINT / + * VLLM_ENDPOINT) then the hardcoded default. Issue #175 MVP. */ customBaseUrl?: string; } export interface RoundtableParticipant { name: string; model?: string; - /** "claude" | "codex" | "gemini" | "ollama" | "lmstudio" — defaults to "claude" on backend */ + /** "claude" | "codex" | "gemini" | "ollama" | "lmstudio" | "vllm" — defaults to "claude" on backend */ engine?: string; /** Blind verifier — receives topic only, no prior/current transcript */ blind?: boolean;