Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 64 additions & 10 deletions src-tauri/src/agents/openai_compat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,41 @@ pub fn lmstudio_base_url() -> String {
.unwrap_or_else(|_| "http://localhost:1234".into())
}

/// vLLM base URL (default: localhost:8000)
pub fn vllm_base_url() -> String {
std::env::var("VLLM_ENDPOINT")
.unwrap_or_else(|_| "http://localhost:8000".into())
}

/// Discover vLLM models via OpenAI-compatible `/v1/models` endpoint.
pub fn discover_vllm() -> Option<Vec<String>> {
let endpoint = vllm_base_url();
let url = format!("{}/v1/models", endpoint.trim_end_matches('/'));

let client = reqwest::blocking::Client::builder()
.timeout(std::time::Duration::from_secs(3))
.build()
.ok()?;

let mut req = client.get(&url);
if let Ok(token) = std::env::var("VLLM_API_KEY") {
req = req.header("Authorization", format!("Bearer {}", token));
}
let resp = req.send().ok()?;
if !resp.status().is_success() {
eprintln!("[openai_compat] vllm {} → {}", url, resp.status());
return None;
}

let body: serde_json::Value = resp.json().ok()?;
let data = body.get("data")?.as_array()?;
let models: Vec<String> = data.iter()
.filter_map(|m| m.get("id").and_then(|v| v.as_str()).map(String::from))
.collect();

if models.is_empty() { None } else { Some(models) }
}

pub async fn stream_run_with_base<F, G>(
input: RunInput,
base: String,
Expand Down Expand Up @@ -128,7 +163,13 @@ where
tools: Some(tools_json),
};

let engine_name = if base.contains(":1234") || base.contains("lmstudio") { "LM Studio" } else { "Ollama" };
let engine_name = if base.contains(":1234") || base.contains("lmstudio") {
"LM Studio"
} else if base.contains(":8000") || base.contains("vllm") {
"vLLM"
} else {
"Ollama"
};
on_progress(format!("{} ({}) initializing...", engine_name, model));

let client = Client::builder()
Expand All @@ -137,10 +178,14 @@ where
.map_err(|e| AppError::Agent(format!("HTTP client build failed: {}", e)))?;

let mut req = client.post(&url).json(&body);
if let Ok(token) = std::env::var("LMSTUDIO_API_KEY") {
if engine_name == "LM Studio" {
req = req.header("Authorization", format!("Bearer {}", token));
}
// Apply API key based on engine
let api_key = match engine_name {
"LM Studio" => std::env::var("LMSTUDIO_API_KEY").ok(),
"vLLM" => std::env::var("VLLM_API_KEY").ok(),
_ => None,
};
if let Some(token) = api_key {
req = req.header("Authorization", format!("Bearer {}", token));
}
let response = req
.send()
Expand Down Expand Up @@ -203,7 +248,13 @@ where
tools: None,
};

let engine_name = if base.contains(":1234") { "LM Studio" } else { "Ollama" };
let engine_name = if base.contains(":1234") || base.contains("lmstudio") {
"LM Studio"
} else if base.contains(":8000") || base.contains("vllm") {
"vLLM"
} else {
"Ollama"
};
on_progress(format!("{} ({}) running (no tools)...", engine_name, model));

let client = Client::builder()
Expand All @@ -212,10 +263,13 @@ where
.map_err(|e| AppError::Agent(format!("HTTP client build failed: {}", e)))?;

let mut req = client.post(&url).json(&body);
if let Ok(token) = std::env::var("LMSTUDIO_API_KEY") {
if engine_name == "LM Studio" {
req = req.header("Authorization", format!("Bearer {}", token));
}
let api_key = match engine_name {
"LM Studio" => std::env::var("LMSTUDIO_API_KEY").ok(),
"vLLM" => std::env::var("VLLM_API_KEY").ok(),
_ => None,
};
if let Some(token) = api_key {
req = req.header("Authorization", format!("Bearer {}", token));
}
let response = req.send().await
.map_err(|e| AppError::Agent(format!("OpenAI-compatible API 요청 실패: {}", e)))?;
Expand Down
62 changes: 60 additions & 2 deletions src-tauri/src/commands/agent_detect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,24 +246,82 @@ async fn probe_lmstudio(endpoint: &str) -> AgentDetection {
det
}

async fn probe_vllm(endpoint: &str) -> AgentDetection {
// vLLM uses OpenAI-compatible /v1/models endpoint
let base_raw = endpoint.trim_end_matches('/');
let base = if base_raw.ends_with("/v1") { base_raw.to_string() } else { format!("{}/v1", base_raw) };
let url = format!("{}/models", base);

let mut det = AgentDetection {
engine: "vllm".into(),
kind: "http",
installed: false,
version: None,
path: None,
endpoint: Some(base_raw.to_string()),
models: vec![],
note: None,
};

let client = match reqwest::Client::builder()
.timeout(Duration::from_millis(PROBE_TIMEOUT_MS))
.build()
{
Ok(c) => c,
Err(e) => { det.note = Some(format!("reqwest build error: {e}")); return det; }
};

eprintln!("[agent-detect] probe vllm: GET {}", url);
match client.get(&url).send().await {
Comment on lines +274 to +275
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The probe_vllm function does not include the Authorization header with VLLM_API_KEY when sending the probe request. If the vLLM instance requires authentication (which is common for shared or cloud-hosted instances), the detection probe will fail with a 401 Unauthorized status, even if the key is configured in the environment. Adding the Authorization header ensures that authenticated vLLM instances are correctly detected during onboarding.

    eprintln!("[agent-detect] probe vllm: GET {}", url);
    let mut req = client.get(&url);
    if let Ok(token) = std::env::var("VLLM_API_KEY") {
        req = req.header("Authorization", format!("Bearer {}", token));
    }
    match req.send().await {

Ok(resp) if resp.status().is_success() => {
match resp.json::<OpenAiModelsResponse>().await {
Ok(body) => {
det.installed = true;
det.models = body.data.into_iter().map(|m| m.id).collect();
eprintln!("[agent-detect] vllm ok — {} models", det.models.len());
}
Err(e) => {
eprintln!("[agent-detect] vllm parse error: {e}");
det.note = Some(format!("응답 파싱 실패: {e}"));
}
}
}
Ok(resp) => {
let status = resp.status();
eprintln!("[agent-detect] vllm status {}", status);
det.note = Some(format!("HTTP {status}"));
}
Err(e) => {
eprintln!("[agent-detect] vllm unreachable: {e}");
det.note = Some(if e.is_timeout() { "timeout".into() } else { "not reachable".into() });
}
}
det
}

// ─── Tauri command ───────────────────────────────────────────────────────────

#[tauri::command]
pub async fn detect_available_agents(
ollama_endpoint: Option<String>,
lmstudio_endpoint: Option<String>,
vllm_endpoint: Option<String>,
) -> Vec<AgentDetection> {
let ollama_ep = ollama_endpoint.unwrap_or_else(|| "http://localhost:11434".into());
let lmstudio_ep = lmstudio_endpoint.unwrap_or_else(|| "http://localhost:1234/v1".into());
let vllm_ep = vllm_endpoint.unwrap_or_else(|| {
std::env::var("VLLM_ENDPOINT").unwrap_or_else(|_| "http://localhost:8000".into())
});

// CLI probes — 병렬
let (claude, codex, gemini, ollama, lmstudio) = tokio::join!(
let (claude, codex, gemini, ollama, lmstudio, vllm) = tokio::join!(
probe_cli("claude", "claude", &["--version"]),
probe_cli("codex", "codex", &["--version"]),
probe_cli("gemini", "gemini", &["--version"]),
probe_ollama(&ollama_ep),
probe_lmstudio(&lmstudio_ep),
probe_vllm(&vllm_ep),
);

vec![claude, codex, gemini, ollama, lmstudio]
vec![claude, codex, gemini, ollama, lmstudio, vllm]
}
20 changes: 12 additions & 8 deletions src-tauri/src/commands/agents.rs
Original file line number Diff line number Diff line change
Expand Up @@ -498,9 +498,13 @@ pub async fn start_openai_compat_stream(
) -> Result<StartRunResult, AppError> {
let db = state.inner().clone();
let db_post = state.inner().clone();
let is_lmstudio = input.engine.as_deref() == Some("lmstudio");
let engine_label = if is_lmstudio { "lmstudio" } else { "ollama" };
eprintln!("[openai-compat] engine={:?} model={:?} is_lmstudio={}", input.engine, input.model, is_lmstudio);
let engine_key = input.engine.as_deref().unwrap_or("ollama");
let engine_label = match engine_key {
"lmstudio" => "lmstudio",
"vllm" => "vllm",
_ => "ollama",
};
eprintln!("[openai-compat] engine={:?} model={:?} engine_label={}", input.engine, input.model, engine_label);
let id_frag = identity_fragment(&input, engine_label);
let write_arc = db_write_arc(&state);
let cid = input.conversation_id.clone();
Expand Down Expand Up @@ -529,10 +533,10 @@ pub async fn start_openai_compat_stream(
.filter(|s| !s.is_empty())
.map(str::to_owned)
.unwrap_or_else(|| {
if is_lmstudio {
openai_compat::lmstudio_base_url()
} else {
std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".into())
match engine_label {
"lmstudio" => openai_compat::lmstudio_base_url(),
"vllm" => openai_compat::vllm_base_url(),
_ => std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".into()),
}
});

Expand Down Expand Up @@ -607,7 +611,7 @@ pub fn run_eval_agent(
"codex" => codex::run(run_input),
"gemini" => gemini::run(run_input),
"opencode" => opencode::run(run_input),
"ollama" => openai_compat::run(run_input),
"ollama" | "vllm" => openai_compat::run(run_input),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

In run_eval_agent, the evaluation path for vllm currently routes through openai_compat::run(run_input), which is hardcoded to use ollama_base_url(). This causes vLLM evaluation requests to be incorrectly routed to the Ollama endpoint. We should instead use openai_compat::stream_run_with_base with the correct vllm_base_url() by blocking on the current Tokio runtime handle.

        "ollama" => openai_compat::run(run_input),
        "vllm" => {
            let base_url = openai_compat::vllm_base_url();
            let rt = tokio::runtime::Handle::try_current()
                .map_err(|_| AppError::Agent("No tokio runtime available for vllm".into()))?;
            rt.block_on(async {
                openai_compat::stream_run_with_base(run_input, base_url, |_| {}, |_| {}).await
            })
        }

_ => claude::run(run_input),
};
let duration_ms = t0.elapsed().as_millis() as i64;
Expand Down
4 changes: 3 additions & 1 deletion src-tauri/src/commands/model_discovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ fn fallback_models(engine: &str) -> Vec<(&'static str, &'static str, bool)> {
("phi-4:latest", "Phi-4", false),
],
"lmstudio" => vec![], // LM Studio models are always discovered live
"vllm" => vec![], // vLLM models are always discovered live
_ => vec![],
}
}
Expand Down Expand Up @@ -366,7 +367,7 @@ fn discover_lmstudio() -> Option<Vec<String>> {

// ─── Core API ───────────────────────────────────────────────────────────────

const ENGINES: &[&str] = &["claude", "codex", "gemini", "ollama", "lmstudio"];
const ENGINES: &[&str] = &["claude", "codex", "gemini", "ollama", "lmstudio", "vllm"];

fn get_models_for_engine(engine: &str, force: bool) -> (Vec<String>, String) {
// Check cache — invalidate early if the tracked binary's mtime has changed
Expand Down Expand Up @@ -398,6 +399,7 @@ fn get_models_for_engine(engine: &str, force: bool) -> (Vec<String>, String) {
},
"ollama" => (crate::agents::openai_compat::discover_models(), None),
"lmstudio" => (discover_lmstudio(), None),
"vllm" => (crate::agents::openai_compat::discover_vllm(), None),
_ => (None, None),
};

Expand Down
6 changes: 4 additions & 2 deletions src-tauri/src/commands/roundtable_helpers/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ pub async fn run_participant(
"gemini" => (gemini::run(run_input), "gemini"),
"opencode" => (opencode::run(run_input), "opencode"),
"ollama" => (openai_compat::run(run_input), "ollama"),
"vllm" => (openai_compat::run(run_input), "vllm"),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The fallback non-streaming run_participant execution for vllm currently routes through openai_compat::run(run_input), which is hardcoded to use ollama_base_url() (typically http://localhost:11434). This causes vLLM roundtable requests to be incorrectly routed to the Ollama endpoint. We should instead use openai_compat::stream_run_with_base with the correct vllm_base_url() by blocking on the current Tokio runtime handle.

            "vllm" => {
                let base_url = openai_compat::vllm_base_url();
                let res = tokio::runtime::Handle::try_current()
                    .map_err(|_| AppError::Agent("No tokio runtime available for vllm".into()))
                    .and_then(|rt| {
                        rt.block_on(async {
                            openai_compat::stream_run_with_base(run_input, base_url, |_| {}, |_| {}).await
                        })
                    });
                (res, "vllm")
            }

_ => (
Err(AppError::Agent(format!("unsupported engine: {}", engine_key_owned))),
"unknown",
Expand Down Expand Up @@ -168,7 +169,7 @@ pub(super) async fn stream_participant(
.await
.unwrap_or_else(|_| (Err(AppError::Agent("participant task panicked".into())), "unknown"))
}
"ollama" => {
"ollama" | "vllm" => {
let a = app.clone(); let mi = msg_id.clone(); let ci = conversation_id.clone();
let on_chunk = {
let a = a.clone(); let mi = mi.clone(); let ci = ci.clone();
Expand All @@ -179,7 +180,8 @@ pub(super) async fn stream_participant(
}
};
let on_progress = |_: String| {};
(openai_compat::stream_run(run_input, on_progress, on_chunk).await, "ollama")
let label = if engine_key_owned == "vllm" { "vllm" } else { "ollama" };
(openai_compat::stream_run(run_input, on_progress, on_chunk).await, label)
Comment on lines +183 to +184
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

In stream_participant, the streaming execution for vllm currently calls openai_compat::stream_run(run_input, on_progress, on_chunk), which is hardcoded to use ollama_base_url(). This causes vLLM streaming roundtable requests to be incorrectly routed to the Ollama endpoint. We should instead use openai_compat::stream_run_with_base with the correct base URL for the selected engine.

Suggested change
let label = if engine_key_owned == "vllm" { "vllm" } else { "ollama" };
(openai_compat::stream_run(run_input, on_progress, on_chunk).await, label)
let base_url = if engine_key_owned == "vllm" {
openai_compat::vllm_base_url()
} else {
std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".into())
};
let label = if engine_key_owned == "vllm" { "vllm" } else { "ollama" };
(openai_compat::stream_run_with_base(run_input, base_url, on_progress, on_chunk).await, label)

}
"opencode" => {
tokio::task::spawn_blocking(move || {
Expand Down
1 change: 1 addition & 0 deletions src/components/tunaflow/AgentAvatar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const ENGINE_ICONS: Record<string, string> = {
const ENGINE_INITIALS: Record<string, string> = {
ollama: "O",
lmstudio: "L",
vllm: "V",
};

interface AgentAvatarProps {
Expand Down
2 changes: 1 addition & 1 deletion src/components/tunaflow/CreateRoundtableDialog.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ const RT_MODES: { id: RtMode; label: string; desc: string }[] = [
{ id: "deliberative", label: "Deliberative", desc: "Round 1 independent, Round 2+ reflects on all" },
];

const ENGINES = ["claude", "codex", "gemini", "ollama", "lmstudio"] as const;
const ENGINES = ["claude", "codex", "gemini", "ollama", "lmstudio", "vllm"] as const;

interface CreateRoundtableDialogProps {
open: boolean;
Expand Down
27 changes: 19 additions & 8 deletions src/components/tunaflow/MetaAgentSelector.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ const ENGINE_META: Record<string, EngineMeta> = {
gemini: { label: "Gemini", installHintKey: "", installHint: "npm install -g @google/gemini-cli", docLink: "https://ai.google.dev/gemini-api/docs/cli" },
ollama: { label: "Ollama", installHintKey: "ollama_install_hint", defaultEndpoint: "http://localhost:11434" },
lmstudio: { label: "LM Studio", installHintKey: "lmstudio_install_hint", defaultEndpoint: "http://localhost:1234/v1" },
vllm: { label: "vLLM", installHintKey: "vllm_install_hint", defaultEndpoint: "http://localhost:8000" },
};

// CLI engines whose `models` list comes from the dynamic discovery store
Expand All @@ -60,6 +61,7 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) {
const [detections, setDetections] = useState<AgentDetection[] | null>(null);
const [ollamaEndpoint, setOllamaEndpoint] = useState("http://localhost:11434");
const [lmstudioEndpoint, setLmstudioEndpoint] = useState("http://localhost:1234/v1");
const [vllmEndpoint, setVllmEndpoint] = useState("http://localhost:8000");

const [selectedEngine, setSelectedEngine] = useState<string | null>(null);
const [modelByEngine, setModelByEngine] = useState<Record<string, string>>({});
Expand All @@ -86,12 +88,13 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) {
};

// Initial + on-endpoint-change detection
const runDetect = async (oEp: string, lEp: string) => {
const runDetect = async (oEp: string, lEp: string, vEp: string) => {
setDetections(null);
try {
const result = await invoke<AgentDetection[]>("detect_available_agents", {
ollamaEndpoint: oEp,
lmstudioEndpoint: lEp,
vllmEndpoint: vEp,
});
setDetections(result);
} catch (e) {
Expand All @@ -107,7 +110,7 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) {
if (engineModels.length === 0) {
loadEngineModels().catch((e) => console.warn("[meta-agent] loadEngineModels", e));
}
runDetect(ollamaEndpoint, lmstudioEndpoint);
runDetect(ollamaEndpoint, lmstudioEndpoint, vllmEndpoint);
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []);

Expand All @@ -132,15 +135,17 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) {
});
}, [detections, engineModels]);

const onEndpointChange = (engine: "ollama" | "lmstudio", value: string) => {
const onEndpointChange = (engine: "ollama" | "lmstudio" | "vllm", value: string) => {
if (engine === "ollama") setOllamaEndpoint(value);
else setLmstudioEndpoint(value);
else if (engine === "lmstudio") setLmstudioEndpoint(value);
else setVllmEndpoint(value);

if (debounceRef.current) window.clearTimeout(debounceRef.current);
debounceRef.current = window.setTimeout(() => {
const o = engine === "ollama" ? value : ollamaEndpoint;
const l = engine === "lmstudio" ? value : lmstudioEndpoint;
runDetect(o, l);
const v = engine === "vllm" ? value : vllmEndpoint;
runDetect(o, l, v);
}, 600);
};

Expand Down Expand Up @@ -169,7 +174,11 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) {
if (!canProceed || !selectedEngine) return;
const det = detections!.find((d) => d.engine === selectedEngine)!;
const endpoint = det.kind === "http"
? (selectedEngine === "ollama" ? ollamaEndpoint : lmstudioEndpoint)
? (selectedEngine === "ollama"
? ollamaEndpoint
: selectedEngine === "lmstudio"
? lmstudioEndpoint
: vllmEndpoint)
: undefined;
onProceed({
engine: selectedEngine,
Expand Down Expand Up @@ -253,8 +262,8 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) {
<span className="text-[10px] text-muted-foreground/60 shrink-0">Endpoint</span>
<input
type="text"
value={d.engine === "ollama" ? ollamaEndpoint : lmstudioEndpoint}
onChange={(e) => onEndpointChange(d.engine as "ollama" | "lmstudio", e.target.value)}
value={d.engine === "ollama" ? ollamaEndpoint : d.engine === "lmstudio" ? lmstudioEndpoint : vllmEndpoint}
onChange={(e) => onEndpointChange(d.engine as "ollama" | "lmstudio" | "vllm", e.target.value)}
className="flex-1 text-[10px] font-mono bg-background border border-border/60 rounded px-2 py-1 focus:outline-none focus:border-primary/60"
/>
</div>
Expand Down Expand Up @@ -308,6 +317,8 @@ export function MetaAgentSelector({ onProceed, onSkip, projectName }: Props) {
? t("meta_agent.ollama_install_hint")
: meta.installHintKey === "lmstudio_install_hint"
? t("meta_agent.lmstudio_install_hint")
: meta.installHintKey === "vllm_install_hint"
? t("meta_agent.vllm_install_hint")
: meta.installHint}
</span>
</div>
Expand Down
Loading