-
Notifications
You must be signed in to change notification settings - Fork 17
feat(engines): add vLLM as 6th UI-connected engine — RT, meta agent, … #296
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -498,9 +498,13 @@ pub async fn start_openai_compat_stream( | |
| ) -> Result<StartRunResult, AppError> { | ||
| let db = state.inner().clone(); | ||
| let db_post = state.inner().clone(); | ||
| let is_lmstudio = input.engine.as_deref() == Some("lmstudio"); | ||
| let engine_label = if is_lmstudio { "lmstudio" } else { "ollama" }; | ||
| eprintln!("[openai-compat] engine={:?} model={:?} is_lmstudio={}", input.engine, input.model, is_lmstudio); | ||
| let engine_key = input.engine.as_deref().unwrap_or("ollama"); | ||
| let engine_label = match engine_key { | ||
| "lmstudio" => "lmstudio", | ||
| "vllm" => "vllm", | ||
| _ => "ollama", | ||
| }; | ||
| eprintln!("[openai-compat] engine={:?} model={:?} engine_label={}", input.engine, input.model, engine_label); | ||
| let id_frag = identity_fragment(&input, engine_label); | ||
| let write_arc = db_write_arc(&state); | ||
| let cid = input.conversation_id.clone(); | ||
|
|
@@ -529,10 +533,10 @@ pub async fn start_openai_compat_stream( | |
| .filter(|s| !s.is_empty()) | ||
| .map(str::to_owned) | ||
| .unwrap_or_else(|| { | ||
| if is_lmstudio { | ||
| openai_compat::lmstudio_base_url() | ||
| } else { | ||
| std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".into()) | ||
| match engine_label { | ||
| "lmstudio" => openai_compat::lmstudio_base_url(), | ||
| "vllm" => openai_compat::vllm_base_url(), | ||
| _ => std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".into()), | ||
| } | ||
| }); | ||
|
|
||
|
|
@@ -607,7 +611,7 @@ pub fn run_eval_agent( | |
| "codex" => codex::run(run_input), | ||
| "gemini" => gemini::run(run_input), | ||
| "opencode" => opencode::run(run_input), | ||
| "ollama" => openai_compat::run(run_input), | ||
| "ollama" | "vllm" => openai_compat::run(run_input), | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In "ollama" => openai_compat::run(run_input),
"vllm" => {
let base_url = openai_compat::vllm_base_url();
let rt = tokio::runtime::Handle::try_current()
.map_err(|_| AppError::Agent("No tokio runtime available for vllm".into()))?;
rt.block_on(async {
openai_compat::stream_run_with_base(run_input, base_url, |_| {}, |_| {}).await
})
} |
||
| _ => claude::run(run_input), | ||
| }; | ||
| let duration_ms = t0.elapsed().as_millis() as i64; | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -52,6 +52,7 @@ pub async fn run_participant( | |||||||||||||||||||
| "gemini" => (gemini::run(run_input), "gemini"), | ||||||||||||||||||||
| "opencode" => (opencode::run(run_input), "opencode"), | ||||||||||||||||||||
| "ollama" => (openai_compat::run(run_input), "ollama"), | ||||||||||||||||||||
| "vllm" => (openai_compat::run(run_input), "vllm"), | ||||||||||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The fallback non-streaming "vllm" => {
let base_url = openai_compat::vllm_base_url();
let res = tokio::runtime::Handle::try_current()
.map_err(|_| AppError::Agent("No tokio runtime available for vllm".into()))
.and_then(|rt| {
rt.block_on(async {
openai_compat::stream_run_with_base(run_input, base_url, |_| {}, |_| {}).await
})
});
(res, "vllm")
} |
||||||||||||||||||||
| _ => ( | ||||||||||||||||||||
| Err(AppError::Agent(format!("unsupported engine: {}", engine_key_owned))), | ||||||||||||||||||||
| "unknown", | ||||||||||||||||||||
|
|
@@ -168,7 +169,7 @@ pub(super) async fn stream_participant( | |||||||||||||||||||
| .await | ||||||||||||||||||||
| .unwrap_or_else(|_| (Err(AppError::Agent("participant task panicked".into())), "unknown")) | ||||||||||||||||||||
| } | ||||||||||||||||||||
| "ollama" => { | ||||||||||||||||||||
| "ollama" | "vllm" => { | ||||||||||||||||||||
| let a = app.clone(); let mi = msg_id.clone(); let ci = conversation_id.clone(); | ||||||||||||||||||||
| let on_chunk = { | ||||||||||||||||||||
| let a = a.clone(); let mi = mi.clone(); let ci = ci.clone(); | ||||||||||||||||||||
|
|
@@ -179,7 +180,8 @@ pub(super) async fn stream_participant( | |||||||||||||||||||
| } | ||||||||||||||||||||
| }; | ||||||||||||||||||||
| let on_progress = |_: String| {}; | ||||||||||||||||||||
| (openai_compat::stream_run(run_input, on_progress, on_chunk).await, "ollama") | ||||||||||||||||||||
| let label = if engine_key_owned == "vllm" { "vllm" } else { "ollama" }; | ||||||||||||||||||||
| (openai_compat::stream_run(run_input, on_progress, on_chunk).await, label) | ||||||||||||||||||||
|
Comment on lines
+183
to
+184
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In
Suggested change
|
||||||||||||||||||||
| } | ||||||||||||||||||||
| "opencode" => { | ||||||||||||||||||||
| tokio::task::spawn_blocking(move || { | ||||||||||||||||||||
|
|
||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
probe_vllmfunction does not include theAuthorizationheader withVLLM_API_KEYwhen sending the probe request. If the vLLM instance requires authentication (which is common for shared or cloud-hosted instances), the detection probe will fail with a401 Unauthorizedstatus, even if the key is configured in the environment. Adding theAuthorizationheader ensures that authenticated vLLM instances are correctly detected during onboarding.