Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions crates/cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -638,12 +638,7 @@ async fn main() -> Result<()> {
.as_deref()
.map(|env| env_value_from_arg("--bearer-env", env, &env_vars))
.transpose()?;
let using_remote_exoharness = cli.exoharness_url.is_some();
let default_sandbox_provider = if using_remote_exoharness {
SandboxProvider::Daytona
} else {
default_local_sandbox_provider()
};
let default_sandbox_provider = default_local_sandbox_provider();
let exoharness =
instantiate_exoharness(&exo_config, cli.exoharness_url.as_deref(), bearer_token).await?;
let harness_kind = determine_harness_kind(
Expand Down
25 changes: 2 additions & 23 deletions crates/executor/src/basic_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1119,28 +1119,7 @@ fn append_event(
}

fn event_type(event: &Event) -> String {
match &event.data {
EventData::ConversationCreated { .. } => "conversation_created".to_string(),
EventData::ConversationUpdated { .. } => "conversation_updated".to_string(),
EventData::ConversationDeleted => "conversation_deleted".to_string(),
EventData::ConversationForked { .. } => "conversation_forked".to_string(),
EventData::SessionStarted => "session_started".to_string(),
EventData::SessionEnded => "session_ended".to_string(),
EventData::TurnStarted => "turn_started".to_string(),
EventData::TurnEnded => "turn_ended".to_string(),
EventData::Messages { .. } => "messages".to_string(),
EventData::ToolRequested { .. } => "tool_requested".to_string(),
EventData::ToolResult { .. } => "tool_result".to_string(),
EventData::ArtifactWritten { .. } => "artifact_written".to_string(),
EventData::SandboxCreated { .. } => "sandbox_created".to_string(),
EventData::SandboxStarted { .. } => "sandbox_started".to_string(),
EventData::SandboxStopped { .. } => "sandbox_stopped".to_string(),
EventData::SandboxSnapshotted { .. } => "sandbox_snapshotted".to_string(),
EventData::SandboxProcessStarted { .. } => "sandbox_process_started".to_string(),
EventData::SandboxProcessStateUpdated { .. } => "sandbox_process_state_updated".to_string(),
EventData::SandboxProcessEvent { .. } => "sandbox_process_event".to_string(),
EventData::Custom { event_type, .. } => event_type.clone(),
}
event.data.kind().as_str().to_string()
}

fn user_message(text: &str) -> Message {
Expand Down Expand Up @@ -1193,7 +1172,7 @@ fn default_agent_config() -> AgentConfig {
typescript: None,
enable_agent_tool_creation: true,
sandbox_image: None,
sandbox_provider: Default::default(),
sandbox_provider: SandboxProvider::LocalProcess,
enable_networking: false,
model: "test-model".to_string(),
max_output_tokens: None,
Expand Down
3 changes: 1 addition & 2 deletions crates/executor/src/executor_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use tokio_stream::{Stream, wrappers::UnboundedReceiverStream};

use crate::braintrust::BraintrustTracingConfig;

#[derive(Debug, Clone, Default, Serialize, serde::Deserialize)]
#[derive(Debug, Clone, Serialize, serde::Deserialize)]
pub struct AgentConfig {
pub instructions: Vec<Message>,
#[serde(default)]
Expand All @@ -26,7 +26,6 @@ pub struct AgentConfig {
pub enable_agent_tool_creation: bool,
#[serde(default)]
pub sandbox_image: Option<String>,
#[serde(default)]
pub sandbox_provider: SandboxProvider,
#[serde(default)]
pub enable_networking: bool,
Expand Down
16 changes: 8 additions & 8 deletions crates/executor/src/harness_basic_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ async fn send_persists_messages_through_harness() {
typescript: None,
enable_agent_tool_creation: true,
sandbox_image: None,
sandbox_provider: Default::default(),
sandbox_provider: SandboxProvider::LocalProcess,
enable_networking: false,
model: "gpt-5.4".to_string(),
max_output_tokens: None,
Expand Down Expand Up @@ -200,7 +200,7 @@ async fn close_session_appends_session_ended_event() {
typescript: None,
enable_agent_tool_creation: true,
sandbox_image: None,
sandbox_provider: Default::default(),
sandbox_provider: SandboxProvider::LocalProcess,
enable_networking: false,
model: "gpt-5.4".to_string(),
max_output_tokens: None,
Expand Down Expand Up @@ -281,7 +281,7 @@ async fn updating_agent_config_refreshes_executor_cache() {
typescript: None,
enable_agent_tool_creation: true,
sandbox_image: None,
sandbox_provider: Default::default(),
sandbox_provider: SandboxProvider::LocalProcess,
enable_networking: false,
model: "gpt-5.4".to_string(),
max_output_tokens: None,
Expand Down Expand Up @@ -363,7 +363,7 @@ async fn send_executes_shell_tool_when_enabled() {
typescript: None,
enable_agent_tool_creation: true,
sandbox_image: Some("agent-image".to_string()),
sandbox_provider: Default::default(),
sandbox_provider: SandboxProvider::LocalProcess,
enable_networking: true,
model: "gpt-5.4".to_string(),
max_output_tokens: None,
Expand Down Expand Up @@ -457,7 +457,7 @@ async fn harness_exposes_raw_exoharness_handles() {
typescript: None,
enable_agent_tool_creation: true,
sandbox_image: None,
sandbox_provider: Default::default(),
sandbox_provider: SandboxProvider::LocalProcess,
enable_networking: false,
model: "gpt-5.4".to_string(),
max_output_tokens: None,
Expand Down Expand Up @@ -562,7 +562,7 @@ async fn updating_mounts_recreates_shell_sandbox() {
typescript: None,
enable_agent_tool_creation: true,
sandbox_image: None,
sandbox_provider: Default::default(),
sandbox_provider: SandboxProvider::LocalProcess,
enable_networking: false,
model: "gpt-5.4".to_string(),
max_output_tokens: None,
Expand Down Expand Up @@ -674,7 +674,7 @@ async fn updating_sandbox_image_recreates_shell_sandbox_without_shell_program()
typescript: None,
enable_agent_tool_creation: true,
sandbox_image: None,
sandbox_provider: Default::default(),
sandbox_provider: SandboxProvider::LocalProcess,
enable_networking: false,
model: "gpt-5.4".to_string(),
max_output_tokens: None,
Expand Down Expand Up @@ -786,7 +786,7 @@ async fn conversation_model_override_changes_effective_model() {
typescript: None,
enable_agent_tool_creation: true,
sandbox_image: None,
sandbox_provider: Default::default(),
sandbox_provider: SandboxProvider::LocalProcess,
enable_networking: false,
model: "gpt-5.4".to_string(),
max_output_tokens: Some(512),
Expand Down
9 changes: 4 additions & 5 deletions crates/executor/src/harness_config.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use anyhow::anyhow;
use exoharness::{
AgentHandle, Artifact, ArtifactVersion, ConversationHandle, ReadArtifactRequest, Result,
WriteArtifactRequest,
Expand All @@ -11,11 +12,9 @@ pub(crate) const AGENT_CONFIG_ARTIFACT_PATH: &str = "config/executor.json";
pub(crate) const CONVERSATION_CONFIG_ARTIFACT_PATH: &str = "config/executor.json";

pub async fn load_agent_config(agent: &dyn AgentHandle) -> Result<AgentConfig> {
Ok(
read_json_artifact_from_agent(agent, AGENT_CONFIG_ARTIFACT_PATH)
.await?
.unwrap_or_default(),
)
read_json_artifact_from_agent(agent, AGENT_CONFIG_ARTIFACT_PATH)
.await?
.ok_or_else(|| anyhow!("missing agent config artifact at {AGENT_CONFIG_ARTIFACT_PATH}"))
}

pub async fn store_agent_config(agent: &dyn AgentHandle, config: &AgentConfig) -> Result<()> {
Expand Down
12 changes: 6 additions & 6 deletions crates/executor/src/rlm_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use anyhow::{anyhow, bail};
use async_trait::async_trait;
use exoharness::{
BasicExoHarness, Binding, EventData, EventQuery, EventQueryDirection, ExoHarness,
PutSecretRequest, Secret, ToolRequest, Uuid7,
PutSecretRequest, SandboxProvider, Secret, ToolRequest, Uuid7,
};
use lingua::universal::{AssistantContent, UserContent};
use lingua::{Message, UniversalStreamChunk};
Expand Down Expand Up @@ -65,7 +65,7 @@ async fn rlm_send_executes_repl_steps_and_persists_final_answer() {
typescript: None,
enable_agent_tool_creation: true,
sandbox_image: None,
sandbox_provider: Default::default(),
sandbox_provider: SandboxProvider::LocalProcess,
enable_networking: false,
model: "gpt-5.4".to_string(),
max_output_tokens: Some(512),
Expand Down Expand Up @@ -193,7 +193,7 @@ async fn rlm_subquery_variable_can_store_final_answer() {
typescript: None,
enable_agent_tool_creation: true,
sandbox_image: None,
sandbox_provider: Default::default(),
sandbox_provider: SandboxProvider::LocalProcess,
enable_networking: false,
model: "gpt-5.4".to_string(),
max_output_tokens: Some(512),
Expand Down Expand Up @@ -259,7 +259,7 @@ async fn rlm_send_stream_suppresses_internal_control_text() {
typescript: None,
enable_agent_tool_creation: true,
sandbox_image: None,
sandbox_provider: Default::default(),
sandbox_provider: SandboxProvider::LocalProcess,
enable_networking: false,
model: "gpt-5.4".to_string(),
max_output_tokens: Some(512),
Expand Down Expand Up @@ -362,7 +362,7 @@ globalThis.answer = String(\n\
typescript: None,
enable_agent_tool_creation: true,
sandbox_image: None,
sandbox_provider: Default::default(),
sandbox_provider: SandboxProvider::LocalProcess,
enable_networking: false,
model: "gpt-5.4".to_string(),
max_output_tokens: Some(512),
Expand Down Expand Up @@ -433,7 +433,7 @@ async fn rlm_can_finish_by_setting_final_in_repl() {
typescript: None,
enable_agent_tool_creation: true,
sandbox_image: None,
sandbox_provider: Default::default(),
sandbox_provider: SandboxProvider::LocalProcess,
enable_networking: false,
model: "gpt-5.4".to_string(),
max_output_tokens: Some(512),
Expand Down
20 changes: 10 additions & 10 deletions crates/exoharness/src/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ impl BasicExoHarnessInner {
fn sandbox_backend_for_provider(
&self,
provider: SandboxProvider,
) -> Arc<dyn ManagedSandboxBackend> {
) -> Result<Arc<dyn ManagedSandboxBackend>> {
if matches!(
(self.sandbox_backend_choice, provider),
(
Expand All @@ -101,19 +101,20 @@ impl BasicExoHarnessInner {
SandboxBackendChoice::LocalProcess,
SandboxProvider::LocalProcess
)
| (_, SandboxProvider::Daytona)
) {
return Arc::clone(&self.sandbox_backend);
return Ok(Arc::clone(&self.sandbox_backend));
}

match provider {
Ok(match provider {
SandboxProvider::AppleContainer => {
Arc::new(CliContainerSandboxBackend::apple_container())
}
SandboxProvider::Docker => Arc::new(CliContainerSandboxBackend::docker()),
SandboxProvider::LocalProcess => Arc::new(LocalProcessSandboxBackend::new()),
SandboxProvider::Daytona => Arc::clone(&self.sandbox_backend),
}
SandboxProvider::Daytona => {
bail!("daytona sandbox provider is not supported by BasicExoHarness")
}
})
}
}

Expand Down Expand Up @@ -658,7 +659,7 @@ impl BasicConversationHandle {
let handle = self
.harness
.inner
.sandbox_backend_for_provider(sandbox.provider)
.sandbox_backend_for_provider(sandbox.provider)?
.acquire(sandbox_request(self.record.id, sandbox_id, sandbox))
.await?;
self.harness
Expand Down Expand Up @@ -1032,7 +1033,7 @@ impl ConversationHandle for BasicConversationHandle {
let sandbox_handle = self
.harness
.inner
.sandbox_backend_for_provider(metadata.provider)
.sandbox_backend_for_provider(metadata.provider)?
.acquire(sandbox_request(self.record.id, &sandbox_id, &metadata))
.await?;
self.harness
Expand Down Expand Up @@ -1187,7 +1188,7 @@ impl ConversationHandle for BasicConversationHandle {
let sandbox_handle = self
.harness
.inner
.sandbox_backend_for_provider(sandbox.provider)
.sandbox_backend_for_provider(sandbox.provider)?
.acquire_from_snapshot(
sandbox_request(self.record.id, &request.id, &sandbox),
payload,
Expand Down Expand Up @@ -1878,7 +1879,6 @@ struct StoredArtifactMetadata {
#[derive(Debug, Clone, Serialize, Deserialize)]
struct StoredSandbox {
id: SandboxId,
#[serde(default)]
provider: SandboxProvider,
image: String,
default_workdir: Option<String>,
Expand Down
61 changes: 56 additions & 5 deletions crates/exoharness/src/basic_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ use crate::{
ManagedSandboxHandle, NewAgentRequest, NewConversationRequest, PutSecretRequest,
RunInSandboxRequest, SandboxCommand, SandboxCommandOutput, SandboxProcessEvent,
SandboxProcessEventQuery, SandboxProcessParts, SandboxProcessStatus, SandboxProcessStdin,
SandboxRequest, Secret, SnapshotPayload, StartSandboxProcessRequest, WaitSandboxProcessRequest,
WriteArtifactRequest, WriteSandboxProcessInputRequest,
SandboxProvider, SandboxRequest, Secret, SnapshotPayload, StartSandboxProcessRequest,
WaitSandboxProcessRequest, WriteArtifactRequest, WriteSandboxProcessInputRequest,
};

#[tokio::test(flavor = "current_thread")]
Expand Down Expand Up @@ -511,7 +511,7 @@ async fn basic_backend_runs_commands_in_created_sandbox() {

let sandbox_id = conversation
.create_sandbox(CreateSandboxRequest {
provider: Default::default(),
provider: SandboxProvider::LocalProcess,
image: "basic-local-process".to_string(),
default_workdir: Some(tempdir.path().display().to_string()),
file_system_mounts: None,
Expand Down Expand Up @@ -569,7 +569,7 @@ async fn basic_backend_exposes_process_events_and_input() {
.expect("conversation");
let sandbox_id = conversation
.create_sandbox(CreateSandboxRequest {
provider: Default::default(),
provider: SandboxProvider::LocalProcess,
image: "basic-local-process".to_string(),
default_workdir: Some(tempdir.path().display().to_string()),
file_system_mounts: None,
Expand Down Expand Up @@ -818,7 +818,7 @@ async fn test_conversation(harness: &BasicExoHarness) -> Arc<dyn crate::Conversa
async fn test_sandbox(conversation: &Arc<dyn crate::ConversationHandle>) -> String {
conversation
.create_sandbox(CreateSandboxRequest {
provider: Default::default(),
provider: SandboxProvider::LocalProcess,
image: "test-sandbox".to_string(),
default_workdir: Some("/".to_string()),
file_system_mounts: None,
Expand All @@ -829,6 +829,57 @@ async fn test_sandbox(conversation: &Arc<dyn crate::ConversationHandle>) -> Stri
.expect("sandbox should be created")
}

#[test]
fn create_sandbox_request_requires_provider() {
let error = serde_json::from_value::<CreateSandboxRequest>(serde_json::json!({
"image": "test-sandbox",
"default_workdir": "/",
"file_system_mounts": null,
"enable_networking": true,
"idle_seconds": 60,
}))
.expect_err("provider should be required");

assert!(error.to_string().contains("missing field `provider`"));
}

#[tokio::test(flavor = "current_thread")]
async fn basic_backend_rejects_daytona_provider() {
let tempdir = TempDir::new().expect("tempdir");
let harness = BasicExoHarness::new(local_test_config(tempdir.path()))
.await
.expect("harness should initialize");
let agent = harness
.new_agent(NewAgentRequest {
slug: "agent".to_string(),
name: "Agent".to_string(),
})
.await
.expect("agent");
let conversation = agent
.new_conversation(NewConversationRequest::default())
.await
.expect("conversation");

let error = conversation
.create_sandbox(CreateSandboxRequest {
provider: SandboxProvider::Daytona,
image: "test-sandbox".to_string(),
default_workdir: Some("/".to_string()),
file_system_mounts: None,
enable_networking: Some(true),
idle_seconds: Some(60),
})
.await
.expect_err("daytona should not be handled by BasicExoHarness");

assert!(
error
.to_string()
.contains("daytona sandbox provider is not supported")
);
}

struct TestSandboxBackend {
process: Arc<AsyncMutex<Option<TestProcessSpec>>>,
}
Expand Down
Loading