From 879577bfc8d1891471816beb887bc782182052a5 Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Tue, 2 Jun 2026 08:42:39 -0700 Subject: [PATCH] add streaming chunks and fix daytona default --- crates/cli/src/main.rs | 7 +-- crates/executor/src/basic_tests.rs | 25 +-------- crates/executor/src/executor_types.rs | 3 +- crates/executor/src/harness_basic_tests.rs | 16 +++--- crates/executor/src/harness_config.rs | 9 ++-- crates/executor/src/rlm_tests.rs | 12 ++--- crates/exoharness/src/basic.rs | 20 +++---- crates/exoharness/src/basic_tests.rs | 61 ++++++++++++++++++++-- crates/exoharness/src/http_tests.rs | 8 +-- crates/exoharness/src/types.rs | 12 +++-- typescript/harness/runner.ts | 4 +- 11 files changed, 101 insertions(+), 76 deletions(-) diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index c80795b..cb17f65 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -638,12 +638,7 @@ async fn main() -> Result<()> { .as_deref() .map(|env| env_value_from_arg("--bearer-env", env, &env_vars)) .transpose()?; - let using_remote_exoharness = cli.exoharness_url.is_some(); - let default_sandbox_provider = if using_remote_exoharness { - SandboxProvider::Daytona - } else { - default_local_sandbox_provider() - }; + let default_sandbox_provider = default_local_sandbox_provider(); let exoharness = instantiate_exoharness(&exo_config, cli.exoharness_url.as_deref(), bearer_token).await?; let harness_kind = determine_harness_kind( diff --git a/crates/executor/src/basic_tests.rs b/crates/executor/src/basic_tests.rs index dd68c3a..f78ba31 100644 --- a/crates/executor/src/basic_tests.rs +++ b/crates/executor/src/basic_tests.rs @@ -1119,28 +1119,7 @@ fn append_event( } fn event_type(event: &Event) -> String { - match &event.data { - EventData::ConversationCreated { .. } => "conversation_created".to_string(), - EventData::ConversationUpdated { .. } => "conversation_updated".to_string(), - EventData::ConversationDeleted => "conversation_deleted".to_string(), - EventData::ConversationForked { .. } => "conversation_forked".to_string(), - EventData::SessionStarted => "session_started".to_string(), - EventData::SessionEnded => "session_ended".to_string(), - EventData::TurnStarted => "turn_started".to_string(), - EventData::TurnEnded => "turn_ended".to_string(), - EventData::Messages { .. } => "messages".to_string(), - EventData::ToolRequested { .. } => "tool_requested".to_string(), - EventData::ToolResult { .. } => "tool_result".to_string(), - EventData::ArtifactWritten { .. } => "artifact_written".to_string(), - EventData::SandboxCreated { .. } => "sandbox_created".to_string(), - EventData::SandboxStarted { .. } => "sandbox_started".to_string(), - EventData::SandboxStopped { .. } => "sandbox_stopped".to_string(), - EventData::SandboxSnapshotted { .. } => "sandbox_snapshotted".to_string(), - EventData::SandboxProcessStarted { .. } => "sandbox_process_started".to_string(), - EventData::SandboxProcessStateUpdated { .. } => "sandbox_process_state_updated".to_string(), - EventData::SandboxProcessEvent { .. } => "sandbox_process_event".to_string(), - EventData::Custom { event_type, .. } => event_type.clone(), - } + event.data.kind().as_str().to_string() } fn user_message(text: &str) -> Message { @@ -1193,7 +1172,7 @@ fn default_agent_config() -> AgentConfig { typescript: None, enable_agent_tool_creation: true, sandbox_image: None, - sandbox_provider: Default::default(), + sandbox_provider: SandboxProvider::LocalProcess, enable_networking: false, model: "test-model".to_string(), max_output_tokens: None, diff --git a/crates/executor/src/executor_types.rs b/crates/executor/src/executor_types.rs index 5cc61a2..7de45ab 100644 --- a/crates/executor/src/executor_types.rs +++ b/crates/executor/src/executor_types.rs @@ -15,7 +15,7 @@ use tokio_stream::{Stream, wrappers::UnboundedReceiverStream}; use crate::braintrust::BraintrustTracingConfig; -#[derive(Debug, Clone, Default, Serialize, serde::Deserialize)] +#[derive(Debug, Clone, Serialize, serde::Deserialize)] pub struct AgentConfig { pub instructions: Vec, #[serde(default)] @@ -26,7 +26,6 @@ pub struct AgentConfig { pub enable_agent_tool_creation: bool, #[serde(default)] pub sandbox_image: Option, - #[serde(default)] pub sandbox_provider: SandboxProvider, #[serde(default)] pub enable_networking: bool, diff --git a/crates/executor/src/harness_basic_tests.rs b/crates/executor/src/harness_basic_tests.rs index 81adf49..4ae4ab0 100644 --- a/crates/executor/src/harness_basic_tests.rs +++ b/crates/executor/src/harness_basic_tests.rs @@ -126,7 +126,7 @@ async fn send_persists_messages_through_harness() { typescript: None, enable_agent_tool_creation: true, sandbox_image: None, - sandbox_provider: Default::default(), + sandbox_provider: SandboxProvider::LocalProcess, enable_networking: false, model: "gpt-5.4".to_string(), max_output_tokens: None, @@ -200,7 +200,7 @@ async fn close_session_appends_session_ended_event() { typescript: None, enable_agent_tool_creation: true, sandbox_image: None, - sandbox_provider: Default::default(), + sandbox_provider: SandboxProvider::LocalProcess, enable_networking: false, model: "gpt-5.4".to_string(), max_output_tokens: None, @@ -281,7 +281,7 @@ async fn updating_agent_config_refreshes_executor_cache() { typescript: None, enable_agent_tool_creation: true, sandbox_image: None, - sandbox_provider: Default::default(), + sandbox_provider: SandboxProvider::LocalProcess, enable_networking: false, model: "gpt-5.4".to_string(), max_output_tokens: None, @@ -363,7 +363,7 @@ async fn send_executes_shell_tool_when_enabled() { typescript: None, enable_agent_tool_creation: true, sandbox_image: Some("agent-image".to_string()), - sandbox_provider: Default::default(), + sandbox_provider: SandboxProvider::LocalProcess, enable_networking: true, model: "gpt-5.4".to_string(), max_output_tokens: None, @@ -457,7 +457,7 @@ async fn harness_exposes_raw_exoharness_handles() { typescript: None, enable_agent_tool_creation: true, sandbox_image: None, - sandbox_provider: Default::default(), + sandbox_provider: SandboxProvider::LocalProcess, enable_networking: false, model: "gpt-5.4".to_string(), max_output_tokens: None, @@ -562,7 +562,7 @@ async fn updating_mounts_recreates_shell_sandbox() { typescript: None, enable_agent_tool_creation: true, sandbox_image: None, - sandbox_provider: Default::default(), + sandbox_provider: SandboxProvider::LocalProcess, enable_networking: false, model: "gpt-5.4".to_string(), max_output_tokens: None, @@ -674,7 +674,7 @@ async fn updating_sandbox_image_recreates_shell_sandbox_without_shell_program() typescript: None, enable_agent_tool_creation: true, sandbox_image: None, - sandbox_provider: Default::default(), + sandbox_provider: SandboxProvider::LocalProcess, enable_networking: false, model: "gpt-5.4".to_string(), max_output_tokens: None, @@ -786,7 +786,7 @@ async fn conversation_model_override_changes_effective_model() { typescript: None, enable_agent_tool_creation: true, sandbox_image: None, - sandbox_provider: Default::default(), + sandbox_provider: SandboxProvider::LocalProcess, enable_networking: false, model: "gpt-5.4".to_string(), max_output_tokens: Some(512), diff --git a/crates/executor/src/harness_config.rs b/crates/executor/src/harness_config.rs index e7ac7e6..97a818a 100644 --- a/crates/executor/src/harness_config.rs +++ b/crates/executor/src/harness_config.rs @@ -1,3 +1,4 @@ +use anyhow::anyhow; use exoharness::{ AgentHandle, Artifact, ArtifactVersion, ConversationHandle, ReadArtifactRequest, Result, WriteArtifactRequest, @@ -11,11 +12,9 @@ pub(crate) const AGENT_CONFIG_ARTIFACT_PATH: &str = "config/executor.json"; pub(crate) const CONVERSATION_CONFIG_ARTIFACT_PATH: &str = "config/executor.json"; pub async fn load_agent_config(agent: &dyn AgentHandle) -> Result { - Ok( - read_json_artifact_from_agent(agent, AGENT_CONFIG_ARTIFACT_PATH) - .await? - .unwrap_or_default(), - ) + read_json_artifact_from_agent(agent, AGENT_CONFIG_ARTIFACT_PATH) + .await? + .ok_or_else(|| anyhow!("missing agent config artifact at {AGENT_CONFIG_ARTIFACT_PATH}")) } pub async fn store_agent_config(agent: &dyn AgentHandle, config: &AgentConfig) -> Result<()> { diff --git a/crates/executor/src/rlm_tests.rs b/crates/executor/src/rlm_tests.rs index b1efe01..bfddf63 100644 --- a/crates/executor/src/rlm_tests.rs +++ b/crates/executor/src/rlm_tests.rs @@ -9,7 +9,7 @@ use anyhow::{anyhow, bail}; use async_trait::async_trait; use exoharness::{ BasicExoHarness, Binding, EventData, EventQuery, EventQueryDirection, ExoHarness, - PutSecretRequest, Secret, ToolRequest, Uuid7, + PutSecretRequest, SandboxProvider, Secret, ToolRequest, Uuid7, }; use lingua::universal::{AssistantContent, UserContent}; use lingua::{Message, UniversalStreamChunk}; @@ -65,7 +65,7 @@ async fn rlm_send_executes_repl_steps_and_persists_final_answer() { typescript: None, enable_agent_tool_creation: true, sandbox_image: None, - sandbox_provider: Default::default(), + sandbox_provider: SandboxProvider::LocalProcess, enable_networking: false, model: "gpt-5.4".to_string(), max_output_tokens: Some(512), @@ -193,7 +193,7 @@ async fn rlm_subquery_variable_can_store_final_answer() { typescript: None, enable_agent_tool_creation: true, sandbox_image: None, - sandbox_provider: Default::default(), + sandbox_provider: SandboxProvider::LocalProcess, enable_networking: false, model: "gpt-5.4".to_string(), max_output_tokens: Some(512), @@ -259,7 +259,7 @@ async fn rlm_send_stream_suppresses_internal_control_text() { typescript: None, enable_agent_tool_creation: true, sandbox_image: None, - sandbox_provider: Default::default(), + sandbox_provider: SandboxProvider::LocalProcess, enable_networking: false, model: "gpt-5.4".to_string(), max_output_tokens: Some(512), @@ -362,7 +362,7 @@ globalThis.answer = String(\n\ typescript: None, enable_agent_tool_creation: true, sandbox_image: None, - sandbox_provider: Default::default(), + sandbox_provider: SandboxProvider::LocalProcess, enable_networking: false, model: "gpt-5.4".to_string(), max_output_tokens: Some(512), @@ -433,7 +433,7 @@ async fn rlm_can_finish_by_setting_final_in_repl() { typescript: None, enable_agent_tool_creation: true, sandbox_image: None, - sandbox_provider: Default::default(), + sandbox_provider: SandboxProvider::LocalProcess, enable_networking: false, model: "gpt-5.4".to_string(), max_output_tokens: Some(512), diff --git a/crates/exoharness/src/basic.rs b/crates/exoharness/src/basic.rs index fce19c3..e2cdebf 100644 --- a/crates/exoharness/src/basic.rs +++ b/crates/exoharness/src/basic.rs @@ -90,7 +90,7 @@ impl BasicExoHarnessInner { fn sandbox_backend_for_provider( &self, provider: SandboxProvider, - ) -> Arc { + ) -> Result> { if matches!( (self.sandbox_backend_choice, provider), ( @@ -101,19 +101,20 @@ impl BasicExoHarnessInner { SandboxBackendChoice::LocalProcess, SandboxProvider::LocalProcess ) - | (_, SandboxProvider::Daytona) ) { - return Arc::clone(&self.sandbox_backend); + return Ok(Arc::clone(&self.sandbox_backend)); } - match provider { + Ok(match provider { SandboxProvider::AppleContainer => { Arc::new(CliContainerSandboxBackend::apple_container()) } SandboxProvider::Docker => Arc::new(CliContainerSandboxBackend::docker()), SandboxProvider::LocalProcess => Arc::new(LocalProcessSandboxBackend::new()), - SandboxProvider::Daytona => Arc::clone(&self.sandbox_backend), - } + SandboxProvider::Daytona => { + bail!("daytona sandbox provider is not supported by BasicExoHarness") + } + }) } } @@ -658,7 +659,7 @@ impl BasicConversationHandle { let handle = self .harness .inner - .sandbox_backend_for_provider(sandbox.provider) + .sandbox_backend_for_provider(sandbox.provider)? .acquire(sandbox_request(self.record.id, sandbox_id, sandbox)) .await?; self.harness @@ -1032,7 +1033,7 @@ impl ConversationHandle for BasicConversationHandle { let sandbox_handle = self .harness .inner - .sandbox_backend_for_provider(metadata.provider) + .sandbox_backend_for_provider(metadata.provider)? .acquire(sandbox_request(self.record.id, &sandbox_id, &metadata)) .await?; self.harness @@ -1187,7 +1188,7 @@ impl ConversationHandle for BasicConversationHandle { let sandbox_handle = self .harness .inner - .sandbox_backend_for_provider(sandbox.provider) + .sandbox_backend_for_provider(sandbox.provider)? .acquire_from_snapshot( sandbox_request(self.record.id, &request.id, &sandbox), payload, @@ -1878,7 +1879,6 @@ struct StoredArtifactMetadata { #[derive(Debug, Clone, Serialize, Deserialize)] struct StoredSandbox { id: SandboxId, - #[serde(default)] provider: SandboxProvider, image: String, default_workdir: Option, diff --git a/crates/exoharness/src/basic_tests.rs b/crates/exoharness/src/basic_tests.rs index 11f6202..0d204fd 100644 --- a/crates/exoharness/src/basic_tests.rs +++ b/crates/exoharness/src/basic_tests.rs @@ -23,8 +23,8 @@ use crate::{ ManagedSandboxHandle, NewAgentRequest, NewConversationRequest, PutSecretRequest, RunInSandboxRequest, SandboxCommand, SandboxCommandOutput, SandboxProcessEvent, SandboxProcessEventQuery, SandboxProcessParts, SandboxProcessStatus, SandboxProcessStdin, - SandboxRequest, Secret, SnapshotPayload, StartSandboxProcessRequest, WaitSandboxProcessRequest, - WriteArtifactRequest, WriteSandboxProcessInputRequest, + SandboxProvider, SandboxRequest, Secret, SnapshotPayload, StartSandboxProcessRequest, + WaitSandboxProcessRequest, WriteArtifactRequest, WriteSandboxProcessInputRequest, }; #[tokio::test(flavor = "current_thread")] @@ -511,7 +511,7 @@ async fn basic_backend_runs_commands_in_created_sandbox() { let sandbox_id = conversation .create_sandbox(CreateSandboxRequest { - provider: Default::default(), + provider: SandboxProvider::LocalProcess, image: "basic-local-process".to_string(), default_workdir: Some(tempdir.path().display().to_string()), file_system_mounts: None, @@ -569,7 +569,7 @@ async fn basic_backend_exposes_process_events_and_input() { .expect("conversation"); let sandbox_id = conversation .create_sandbox(CreateSandboxRequest { - provider: Default::default(), + provider: SandboxProvider::LocalProcess, image: "basic-local-process".to_string(), default_workdir: Some(tempdir.path().display().to_string()), file_system_mounts: None, @@ -818,7 +818,7 @@ async fn test_conversation(harness: &BasicExoHarness) -> Arc) -> String { conversation .create_sandbox(CreateSandboxRequest { - provider: Default::default(), + provider: SandboxProvider::LocalProcess, image: "test-sandbox".to_string(), default_workdir: Some("/".to_string()), file_system_mounts: None, @@ -829,6 +829,57 @@ async fn test_sandbox(conversation: &Arc) -> Stri .expect("sandbox should be created") } +#[test] +fn create_sandbox_request_requires_provider() { + let error = serde_json::from_value::(serde_json::json!({ + "image": "test-sandbox", + "default_workdir": "/", + "file_system_mounts": null, + "enable_networking": true, + "idle_seconds": 60, + })) + .expect_err("provider should be required"); + + assert!(error.to_string().contains("missing field `provider`")); +} + +#[tokio::test(flavor = "current_thread")] +async fn basic_backend_rejects_daytona_provider() { + let tempdir = TempDir::new().expect("tempdir"); + let harness = BasicExoHarness::new(local_test_config(tempdir.path())) + .await + .expect("harness should initialize"); + let agent = harness + .new_agent(NewAgentRequest { + slug: "agent".to_string(), + name: "Agent".to_string(), + }) + .await + .expect("agent"); + let conversation = agent + .new_conversation(NewConversationRequest::default()) + .await + .expect("conversation"); + + let error = conversation + .create_sandbox(CreateSandboxRequest { + provider: SandboxProvider::Daytona, + image: "test-sandbox".to_string(), + default_workdir: Some("/".to_string()), + file_system_mounts: None, + enable_networking: Some(true), + idle_seconds: Some(60), + }) + .await + .expect_err("daytona should not be handled by BasicExoHarness"); + + assert!( + error + .to_string() + .contains("daytona sandbox provider is not supported") + ); +} + struct TestSandboxBackend { process: Arc>>, } diff --git a/crates/exoharness/src/http_tests.rs b/crates/exoharness/src/http_tests.rs index f0b3a8b..c994eab 100644 --- a/crates/exoharness/src/http_tests.rs +++ b/crates/exoharness/src/http_tests.rs @@ -8,8 +8,8 @@ use crate::test_support::local_test_config; use crate::{ BasicExoHarness, CreateSandboxRequest, ExoHarness, HttpExoHarness, RunInSandboxRequest, SandboxProcessEvent, SandboxProcessEventQuery, SandboxProcessStatus, SandboxProcessStdin, - StartSandboxProcessRequest, WaitSandboxProcessRequest, WriteSandboxProcessInputRequest, - serve_exoharness_http_listener, + SandboxProvider, StartSandboxProcessRequest, WaitSandboxProcessRequest, + WriteSandboxProcessInputRequest, serve_exoharness_http_listener, }; struct HttpHarnessFixture { @@ -97,7 +97,7 @@ async fn http_exoharness_runs_noninteractive_sandbox_commands() { .expect("conversation"); let sandbox_id = conversation .create_sandbox(CreateSandboxRequest { - provider: Default::default(), + provider: SandboxProvider::LocalProcess, image: "local".to_string(), default_workdir: Some("/".to_string()), file_system_mounts: None, @@ -143,7 +143,7 @@ async fn http_exoharness_supports_sandbox_process_events() { .expect("conversation"); let sandbox_id = conversation .create_sandbox(CreateSandboxRequest { - provider: Default::default(), + provider: SandboxProvider::LocalProcess, image: "local".to_string(), default_workdir: Some("/".to_string()), file_system_mounts: None, diff --git a/crates/exoharness/src/types.rs b/crates/exoharness/src/types.rs index 40e23cd..e20d6c4 100644 --- a/crates/exoharness/src/types.rs +++ b/crates/exoharness/src/types.rs @@ -9,7 +9,7 @@ use chrono::{DateTime, Utc}; use futures::Stream; use futures::future::BoxFuture; use futures::io::{AsyncRead, AsyncWrite}; -use lingua::Message; +use lingua::{Message, universal::UniversalStreamChunk}; use serde::{Deserialize, Serialize}; use serde_json::{Map, Value}; @@ -204,6 +204,7 @@ impl EventKind { pub const MESSAGES: EventKind = EventKind(Cow::Borrowed("messages")); pub const TOOL_REQUESTED: EventKind = EventKind(Cow::Borrowed("tool_requested")); pub const TOOL_RESULT: EventKind = EventKind(Cow::Borrowed("tool_result")); + pub const LINGUA_STREAM_CHUNK: EventKind = EventKind(Cow::Borrowed("lingua_stream_chunk")); pub const ARTIFACT_WRITTEN: EventKind = EventKind(Cow::Borrowed("artifact_written")); pub const SANDBOX_CREATED: EventKind = EventKind(Cow::Borrowed("sandbox_created")); pub const SANDBOX_STARTED: EventKind = EventKind(Cow::Borrowed("sandbox_started")); @@ -301,6 +302,9 @@ pub enum EventData { tool_call_id: ToolCallId, result: ToolResult, }, + LinguaStreamChunk { + chunk: UniversalStreamChunk, + }, ArtifactWritten { artifact_id: ArtifactId, path: String, @@ -308,7 +312,6 @@ pub enum EventData { }, SandboxCreated { sandbox_id: SandboxId, - #[serde(default)] provider: SandboxProvider, image: String, default_workdir: String, @@ -372,6 +375,7 @@ impl EventData { Self::Messages { .. } => EventKind::MESSAGES, Self::ToolRequested { .. } => EventKind::TOOL_REQUESTED, Self::ToolResult { .. } => EventKind::TOOL_RESULT, + Self::LinguaStreamChunk { .. } => EventKind::LINGUA_STREAM_CHUNK, Self::ArtifactWritten { .. } => EventKind::ARTIFACT_WRITTEN, Self::SandboxCreated { .. } => EventKind::SANDBOX_CREATED, Self::SandboxStarted { .. } => EventKind::SANDBOX_STARTED, @@ -437,7 +441,6 @@ pub struct FileSystemMount { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct CreateSandboxRequest { - #[serde(default)] pub provider: SandboxProvider, pub image: String, pub default_workdir: Option, @@ -446,10 +449,9 @@ pub struct CreateSandboxRequest { pub idle_seconds: Option, } -#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "snake_case")] pub enum SandboxProvider { - #[default] Daytona, AppleContainer, Docker, diff --git a/typescript/harness/runner.ts b/typescript/harness/runner.ts index 9f8149c..732e575 100644 --- a/typescript/harness/runner.ts +++ b/typescript/harness/runner.ts @@ -50,7 +50,7 @@ interface RawAgentConfig { } | null; enable_agent_tool_creation?: boolean; sandbox_image?: string | null; - sandbox_provider?: "daytona" | "apple_container" | "docker" | "local_process"; + sandbox_provider: "daytona" | "apple_container" | "docker" | "local_process"; enable_networking: boolean; model: string; max_output_tokens?: number | null; @@ -829,7 +829,7 @@ function toAgentConfig(raw: RawAgentConfig): AgentConfig { : null, enableAgentToolCreation: raw.enable_agent_tool_creation ?? true, sandboxImage: raw.sandbox_image ?? null, - sandboxProvider: raw.sandbox_provider ?? "daytona", + sandboxProvider: raw.sandbox_provider, enableNetworking: raw.enable_networking, model: raw.model, maxOutputTokens: raw.max_output_tokens ?? null,