hang-in · yodakrkids · May 28, 2026 · gemini-code-assist · May 28, 2026 · gemini-code-assist
diff --git a/src-tauri/src/agents/openai_compat.rs b/src-tauri/src/agents/openai_compat.rs
@@ -87,6 +87,41 @@ pub fn lmstudio_base_url() -> String {
         .unwrap_or_else(|_| "http://localhost:1234".into())
 }
 
+/// vLLM base URL (default: localhost:8000)
+pub fn vllm_base_url() -> String {
+    std::env::var("VLLM_ENDPOINT")
+        .unwrap_or_else(|_| "http://localhost:8000".into())
+}
+
+/// Discover vLLM models via OpenAI-compatible `/v1/models` endpoint.
+pub fn discover_vllm() -> Option<Vec<String>> {
+    let endpoint = vllm_base_url();
+    let url = format!("{}/v1/models", endpoint.trim_end_matches('/'));
+
+    let client = reqwest::blocking::Client::builder()
+        .timeout(std::time::Duration::from_secs(3))
+        .build()
+        .ok()?;
+
+    let mut req = client.get(&url);
+    if let Ok(token) = std::env::var("VLLM_API_KEY") {
+        req = req.header("Authorization", format!("Bearer {}", token));
+    }
+    let resp = req.send().ok()?;
+    if !resp.status().is_success() {
+        eprintln!("[openai_compat] vllm {} → {}", url, resp.status());
+        return None;
+    }
+
+    let body: serde_json::Value = resp.json().ok()?;
+    let data = body.get("data")?.as_array()?;
+    let models: Vec<String> = data.iter()
+        .filter_map(|m| m.get("id").and_then(|v| v.as_str()).map(String::from))
+        .collect();
+
+    if models.is_empty() { None } else { Some(models) }
+}
+
 pub async fn stream_run_with_base<F, G>(
     input: RunInput,
     base: String,
@@ -128,7 +163,13 @@ where
         tools: Some(tools_json),
     };
 
-    let engine_name = if base.contains(":1234") || base.contains("lmstudio") { "LM Studio" } else { "Ollama" };
+    let engine_name = if base.contains(":1234") || base.contains("lmstudio") {
+        "LM Studio"
+    } else if base.contains(":8000") || base.contains("vllm") {
+        "vLLM"
+    } else {
+        "Ollama"
+    };
     on_progress(format!("{} ({}) initializing...", engine_name, model));
 
     let client = Client::builder()
@@ -137,10 +178,14 @@ where
         .map_err(|e| AppError::Agent(format!("HTTP client build failed: {}", e)))?;
 
     let mut req = client.post(&url).json(&body);
-    if let Ok(token) = std::env::var("LMSTUDIO_API_KEY") {
-        if engine_name == "LM Studio" {
-            req = req.header("Authorization", format!("Bearer {}", token));
-        }
+    // Apply API key based on engine
+    let api_key = match engine_name {
+        "LM Studio" => std::env::var("LMSTUDIO_API_KEY").ok(),
+        "vLLM" => std::env::var("VLLM_API_KEY").ok(),
+        _ => None,
+    };
+    if let Some(token) = api_key {
+        req = req.header("Authorization", format!("Bearer {}", token));
     }
     let response = req
         .send()
@@ -203,7 +248,13 @@ where
         tools: None,
     };
 
-    let engine_name = if base.contains(":1234") { "LM Studio" } else { "Ollama" };
+    let engine_name = if base.contains(":1234") || base.contains("lmstudio") {
+        "LM Studio"
+    } else if base.contains(":8000") || base.contains("vllm") {
+        "vLLM"
+    } else {
+        "Ollama"
+    };
     on_progress(format!("{} ({}) running (no tools)...", engine_name, model));
 
     let client = Client::builder()
@@ -212,10 +263,13 @@ where
         .map_err(|e| AppError::Agent(format!("HTTP client build failed: {}", e)))?;
 
     let mut req = client.post(&url).json(&body);
-    if let Ok(token) = std::env::var("LMSTUDIO_API_KEY") {
-        if engine_name == "LM Studio" {
-            req = req.header("Authorization", format!("Bearer {}", token));
-        }
+    let api_key = match engine_name {
+        "LM Studio" => std::env::var("LMSTUDIO_API_KEY").ok(),
+        "vLLM" => std::env::var("VLLM_API_KEY").ok(),
+        _ => None,
+    };
+    if let Some(token) = api_key {
+        req = req.header("Authorization", format!("Bearer {}", token));
     }
     let response = req.send().await
         .map_err(|e| AppError::Agent(format!("OpenAI-compatible API 요청 실패: {}", e)))?;

diff --git a/src-tauri/src/commands/agent_detect.rs b/src-tauri/src/commands/agent_detect.rs
@@ -246,24 +246,82 @@ async fn probe_lmstudio(endpoint: &str) -> AgentDetection {
     det
 }
 
+async fn probe_vllm(endpoint: &str) -> AgentDetection {
+    // vLLM uses OpenAI-compatible /v1/models endpoint
+    let base_raw = endpoint.trim_end_matches('/');
+    let base = if base_raw.ends_with("/v1") { base_raw.to_string() } else { format!("{}/v1", base_raw) };
+    let url = format!("{}/models", base);
+
+    let mut det = AgentDetection {
+        engine: "vllm".into(),
+        kind: "http",
+        installed: false,
+        version: None,
+        path: None,
+        endpoint: Some(base_raw.to_string()),
+        models: vec![],
+        note: None,
+    };
+
+    let client = match reqwest::Client::builder()
+        .timeout(Duration::from_millis(PROBE_TIMEOUT_MS))
+        .build()
+    {
+        Ok(c) => c,
+        Err(e) => { det.note = Some(format!("reqwest build error: {e}")); return det; }
+    };
+
+    eprintln!("[agent-detect] probe vllm: GET {}", url);
+    match client.get(&url).send().await {
+        Ok(resp) if resp.status().is_success() => {
+            match resp.json::<OpenAiModelsResponse>().await {
+                Ok(body) => {
+                    det.installed = true;
+                    det.models = body.data.into_iter().map(|m| m.id).collect();
+                    eprintln!("[agent-detect] vllm ok — {} models", det.models.len());
+                }
+                Err(e) => {
+                    eprintln!("[agent-detect] vllm parse error: {e}");
+                    det.note = Some(format!("응답 파싱 실패: {e}"));
+                }
+            }
+        }
+        Ok(resp) => {
+            let status = resp.status();
+            eprintln!("[agent-detect] vllm status {}", status);
+            det.note = Some(format!("HTTP {status}"));
+        }
+        Err(e) => {
+            eprintln!("[agent-detect] vllm unreachable: {e}");
+            det.note = Some(if e.is_timeout() { "timeout".into() } else { "not reachable".into() });
+        }
+    }
+    det
+}
+
 // ─── Tauri command ───────────────────────────────────────────────────────────
 
 #[tauri::command]
 pub async fn detect_available_agents(
     ollama_endpoint: Option<String>,
     lmstudio_endpoint: Option<String>,
+    vllm_endpoint: Option<String>,
 ) -> Vec<AgentDetection> {
     let ollama_ep = ollama_endpoint.unwrap_or_else(|| "http://localhost:11434".into());
     let lmstudio_ep = lmstudio_endpoint.unwrap_or_else(|| "http://localhost:1234/v1".into());
+    let vllm_ep = vllm_endpoint.unwrap_or_else(|| {
+        std::env::var("VLLM_ENDPOINT").unwrap_or_else(|_| "http://localhost:8000".into())
+    });
 
     // CLI probes — 병렬
-    let (claude, codex, gemini, ollama, lmstudio) = tokio::join!(
+    let (claude, codex, gemini, ollama, lmstudio, vllm) = tokio::join!(
         probe_cli("claude", "claude", &["--version"]),
         probe_cli("codex",  "codex",  &["--version"]),
         probe_cli("gemini", "gemini", &["--version"]),
         probe_ollama(&ollama_ep),
         probe_lmstudio(&lmstudio_ep),
+        probe_vllm(&vllm_ep),
     );
 
-    vec![claude, codex, gemini, ollama, lmstudio]
+    vec![claude, codex, gemini, ollama, lmstudio, vllm]
 }
diff --git a/src-tauri/src/commands/agents.rs b/src-tauri/src/commands/agents.rs
@@ -498,9 +498,13 @@ pub async fn start_openai_compat_stream(
 ) -> Result<StartRunResult, AppError> {
     let db = state.inner().clone();
     let db_post = state.inner().clone();
-    let is_lmstudio = input.engine.as_deref() == Some("lmstudio");
-    let engine_label = if is_lmstudio { "lmstudio" } else { "ollama" };
-    eprintln!("[openai-compat] engine={:?} model={:?} is_lmstudio={}", input.engine, input.model, is_lmstudio);
+    let engine_key = input.engine.as_deref().unwrap_or("ollama");
+    let engine_label = match engine_key {
+        "lmstudio" => "lmstudio",
+        "vllm" => "vllm",
+        _ => "ollama",
+    };
+    eprintln!("[openai-compat] engine={:?} model={:?} engine_label={}", input.engine, input.model, engine_label);
     let id_frag = identity_fragment(&input, engine_label);
     let write_arc = db_write_arc(&state);
     let cid = input.conversation_id.clone();
@@ -529,10 +533,10 @@ pub async fn start_openai_compat_stream(
         .filter(|s| !s.is_empty())
         .map(str::to_owned)
         .unwrap_or_else(|| {
-            if is_lmstudio {
-                openai_compat::lmstudio_base_url()
-            } else {
-                std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".into())
+            match engine_label {
+                "lmstudio" => openai_compat::lmstudio_base_url(),
+                "vllm" => openai_compat::vllm_base_url(),
+                _ => std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".into()),
             }
         });
 
@@ -607,7 +611,7 @@ pub fn run_eval_agent(
         "codex" => codex::run(run_input),
         "gemini" => gemini::run(run_input),
         "opencode" => opencode::run(run_input),
-        "ollama" => openai_compat::run(run_input),
+        "ollama" | "vllm" => openai_compat::run(run_input),
         _ => claude::run(run_input),
     };
     let duration_ms = t0.elapsed().as_millis() as i64;

diff --git a/src-tauri/src/commands/model_discovery.rs b/src-tauri/src/commands/model_discovery.rs
@@ -88,6 +88,7 @@ fn fallback_models(engine: &str) -> Vec<(&'static str, &'static str, bool)> {
             ("phi-4:latest", "Phi-4", false),
         ],
         "lmstudio" => vec![],  // LM Studio models are always discovered live
+        "vllm" => vec![],      // vLLM models are always discovered live
         _ => vec![],
     }
 }
@@ -366,7 +367,7 @@ fn discover_lmstudio() -> Option<Vec<String>> {
 
 // ─── Core API ───────────────────────────────────────────────────────────────
 
-const ENGINES: &[&str] = &["claude", "codex", "gemini", "ollama", "lmstudio"];
+const ENGINES: &[&str] = &["claude", "codex", "gemini", "ollama", "lmstudio", "vllm"];
 
 fn get_models_for_engine(engine: &str, force: bool) -> (Vec<String>, String) {
     // Check cache — invalidate early if the tracked binary's mtime has changed
@@ -398,6 +399,7 @@ fn get_models_for_engine(engine: &str, force: bool) -> (Vec<String>, String) {
         },
         "ollama" => (crate::agents::openai_compat::discover_models(), None),
         "lmstudio" => (discover_lmstudio(), None),
+        "vllm" => (crate::agents::openai_compat::discover_vllm(), None),
         _ => (None, None),
     };
 

diff --git a/src-tauri/src/commands/roundtable_helpers/executor.rs b/src-tauri/src/commands/roundtable_helpers/executor.rs
@@ -52,6 +52,7 @@ pub async fn run_participant(
             "gemini" => (gemini::run(run_input), "gemini"),
             "opencode" => (opencode::run(run_input), "opencode"),
             "ollama" => (openai_compat::run(run_input), "ollama"),
+            "vllm" => (openai_compat::run(run_input), "vllm"),
             _ => (
                 Err(AppError::Agent(format!("unsupported engine: {}", engine_key_owned))),
                 "unknown",
@@ -168,7 +169,7 @@ pub(super) async fn stream_participant(
             .await
             .unwrap_or_else(|_| (Err(AppError::Agent("participant task panicked".into())), "unknown"))
         }
-        "ollama" => {
+        "ollama" | "vllm" => {
             let a = app.clone(); let mi = msg_id.clone(); let ci = conversation_id.clone();
             let on_chunk = {
                 let a = a.clone(); let mi = mi.clone(); let ci = ci.clone();
@@ -179,7 +180,8 @@ pub(super) async fn stream_participant(
                 }
             };
             let on_progress = |_: String| {};
-            (openai_compat::stream_run(run_input, on_progress, on_chunk).await, "ollama")
+            let label = if engine_key_owned == "vllm" { "vllm" } else { "ollama" };
+            (openai_compat::stream_run(run_input, on_progress, on_chunk).await, label)
-            let label = if engine_key_owned == "vllm" { "vllm" } else { "ollama" };
-            (openai_compat::stream_run(run_input, on_progress, on_chunk).await, label)
+            let base_url = if engine_key_owned == "vllm" {
+                openai_compat::vllm_base_url()
+            } else {
+                std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".into())
+            };
+            let label = if engine_key_owned == "vllm" { "vllm" } else { "ollama" };
+            (openai_compat::stream_run_with_base(run_input, base_url, on_progress, on_chunk).await, label)
-            let label = if engine_key_owned == "vllm" { "vllm" } else { "ollama" };
-            (openai_compat::stream_run(run_input, on_progress, on_chunk).await, label)
+            let base_url = if engine_key_owned == "vllm" {
+                openai_compat::vllm_base_url()
+            } else {
+                std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".into())
+            };
+            let label = if engine_key_owned == "vllm" { "vllm" } else { "ollama" };
+            (openai_compat::stream_run_with_base(run_input, base_url, on_progress, on_chunk).await, label)
         }
         "opencode" => {
             tokio::task::spawn_blocking(move || {

diff --git a/src/components/tunaflow/AgentAvatar.tsx b/src/components/tunaflow/AgentAvatar.tsx
@@ -11,6 +11,7 @@ const ENGINE_ICONS: Record<string, string> = {
 const ENGINE_INITIALS: Record<string, string> = {
   ollama: "O",
   lmstudio: "L",
+  vllm: "V",
 };
 
 interface AgentAvatarProps {

diff --git a/src/components/tunaflow/CreateRoundtableDialog.tsx b/src/components/tunaflow/CreateRoundtableDialog.tsx
@@ -14,7 +14,7 @@ const RT_MODES: { id: RtMode; label: string; desc: string }[] = [
   { id: "deliberative", label: "Deliberative", desc: "Round 1 independent, Round 2+ reflects on all" },
 ];
 
-const ENGINES = ["claude", "codex", "gemini", "ollama", "lmstudio"] as const;
+const ENGINES = ["claude", "codex", "gemini", "ollama", "lmstudio", "vllm"] as const;
 
 interface CreateRoundtableDialogProps {
   open: boolean;

diff --git a/src/components/tunaflow/MetaAgentSelector.tsx b/src/components/tunaflow/MetaAgentSelector.tsx
@@ -46,6 +46,7 @@ const ENGINE_META: Record<string, EngineMeta> = {
   gemini:   { label: "Gemini",    installHintKey: "", installHint: "npm install -g @google/gemini-cli",        docLink: "https://ai.google.dev/gemini-api/docs/cli" },
   ollama:   { label: "Ollama",    installHintKey: "ollama_install_hint",   defaultEndpoint: "http://localhost:11434" },
   lmstudio: { label: "LM Studio", installHintKey: "lmstudio_install_hint", defaultEndpoint: "http://localhost:1234/v1" },
+  vllm:     { label: "vLLM",      installHintKey: "vllm_install_hint",     defaultEndpoint: "http://localhost:8000" },
 };
 
 // CLI engines whose `models` list comes from the dynamic discovery store
@@ -60,6 +61,7 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) {
   const [detections, setDetections] = useState<AgentDetection[] | null>(null);
   const [ollamaEndpoint, setOllamaEndpoint] = useState("http://localhost:11434");
   const [lmstudioEndpoint, setLmstudioEndpoint] = useState("http://localhost:1234/v1");
+  const [vllmEndpoint, setVllmEndpoint] = useState("http://localhost:8000");
 
   const [selectedEngine, setSelectedEngine] = useState<string | null>(null);
   const [modelByEngine, setModelByEngine] = useState<Record<string, string>>({});
@@ -86,12 +88,13 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) {
   };
 
   // Initial + on-endpoint-change detection
-  const runDetect = async (oEp: string, lEp: string) => {
+  const runDetect = async (oEp: string, lEp: string, vEp: string) => {
     setDetections(null);
     try {
       const result = await invoke<AgentDetection[]>("detect_available_agents", {
         ollamaEndpoint: oEp,
         lmstudioEndpoint: lEp,
+        vllmEndpoint: vEp,
       });
       setDetections(result);
     } catch (e) {
@@ -107,7 +110,7 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) {
     if (engineModels.length === 0) {
       loadEngineModels().catch((e) => console.warn("[meta-agent] loadEngineModels", e));
     }
-    runDetect(ollamaEndpoint, lmstudioEndpoint);
+    runDetect(ollamaEndpoint, lmstudioEndpoint, vllmEndpoint);
     // eslint-disable-next-line react-hooks/exhaustive-deps
   }, []);
 
@@ -132,15 +135,17 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) {
     });
   }, [detections, engineModels]);
 
-  const onEndpointChange = (engine: "ollama" | "lmstudio", value: string) => {
+  const onEndpointChange = (engine: "ollama" | "lmstudio" | "vllm", value: string) => {
     if (engine === "ollama") setOllamaEndpoint(value);
-    else setLmstudioEndpoint(value);
+    else if (engine === "lmstudio") setLmstudioEndpoint(value);
+    else setVllmEndpoint(value);
 
     if (debounceRef.current) window.clearTimeout(debounceRef.current);
     debounceRef.current = window.setTimeout(() => {
       const o = engine === "ollama" ? value : ollamaEndpoint;
       const l = engine === "lmstudio" ? value : lmstudioEndpoint;
-      runDetect(o, l);
+      const v = engine === "vllm" ? value : vllmEndpoint;
+      runDetect(o, l, v);
     }, 600);
   };
 
@@ -169,7 +174,11 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) {
     if (!canProceed || !selectedEngine) return;
     const det = detections!.find((d) => d.engine === selectedEngine)!;
     const endpoint = det.kind === "http"
-      ? (selectedEngine === "ollama" ? ollamaEndpoint : lmstudioEndpoint)
+      ? (selectedEngine === "ollama"
+          ? ollamaEndpoint
+          : selectedEngine === "lmstudio"
+            ? lmstudioEndpoint
+            : vllmEndpoint)
       : undefined;
     onProceed({
       engine: selectedEngine,
@@ -253,8 +262,8 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) {
                       <span className="text-[10px] text-muted-foreground/60 shrink-0">Endpoint</span>
                       <input
                         type="text"
-                        value={d.engine === "ollama" ? ollamaEndpoint : lmstudioEndpoint}
-                        onChange={(e) => onEndpointChange(d.engine as "ollama" | "lmstudio", e.target.value)}
+                        value={d.engine === "ollama" ? ollamaEndpoint : d.engine === "lmstudio" ? lmstudioEndpoint : vllmEndpoint}
+                        onChange={(e) => onEndpointChange(d.engine as "ollama" | "lmstudio" | "vllm", e.target.value)}
                         className="flex-1 text-[10px] font-mono bg-background border border-border/60 rounded px-2 py-1 focus:outline-none focus:border-primary/60"
                       />
                     </div>
@@ -308,6 +317,8 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) {
                             ? t("meta_agent.ollama_install_hint")
                             : meta.installHintKey === "lmstudio_install_hint"
                             ? t("meta_agent.lmstudio_install_hint")
+                            : meta.installHintKey === "vllm_install_hint"
+                            ? t("meta_agent.vllm_install_hint")
                             : meta.installHint}
                         </span>
                       </div>