Skip to content
186 changes: 185 additions & 1 deletion crates/cli/src/tui.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ use std::time::Duration;

use executor::{
ConversationHandle, EventData, EventId, EventQuery, EventQueryDirection, ExecutionStreamEvent,
HarnessConversation, SendRequest, SessionId,
HarnessConversation, SandboxId, SendRequest, SessionId, SnapshotId, SnapshotMode,
StartSandboxRequest,
};
use lingua::universal::{UserContent, UserContentPart};
use lingua::{Message, UniversalStreamChunk};
Expand Down Expand Up @@ -382,6 +383,57 @@ impl ChatRepl {
self.print_transcript().await?;
continue;
}
if trimmed == "/help" {
print_help();
continue;
}
if trimmed == "/snapshot" {
match self
.snapshot_current_sandbox(SnapshotMode::Filesystem)
.await
{
Ok(snapshot_id) => println!("snapshot {snapshot_id}"),
Err(error) => println!("snapshot failed: {error:#}"),
}
continue;
}
if trimmed == "/checkpoint" {
match self
.snapshot_current_sandbox(SnapshotMode::FullState)
.await
{
Ok(snapshot_id) => println!("checkpoint {snapshot_id}"),
Err(error) => println!("checkpoint failed: {error:#}"),
}
continue;
}
if trimmed == "/snapshots" {
match self.list_snapshots().await {
Ok(snapshots) if snapshots.is_empty() => {
println!("no snapshots yet for this conversation");
}
Ok(snapshots) => {
println!("SNAPSHOT\tSANDBOX");
for (snapshot_id, sandbox_id) in snapshots {
println!("{snapshot_id}\t{sandbox_id}");
}
}
Err(error) => println!("listing snapshots failed: {error:#}"),
}
continue;
}
if let Some(arg) = trimmed.strip_prefix("/rewind ") {
let snapshot_id_str = arg.trim();
if snapshot_id_str.is_empty() {
println!("usage: /rewind <snapshot-id>");
continue;
}
match self.rewind_to_snapshot(snapshot_id_str).await {
Ok(()) => println!("rewound to snapshot {snapshot_id_str}"),
Err(error) => println!("rewind failed: {error:#}"),
}
continue;
}

self.editor.add_history_entry(line.as_str())?;
self.send(trimmed).await?;
Expand All @@ -405,6 +457,49 @@ impl ChatRepl {
Ok(())
}

/// Find the most recent `SandboxCreated` event and snapshot that sandbox.
/// The conversation's active sandbox must have been created (or started)
/// in *this* REPL session — running_sandboxes is a per-process map.
async fn snapshot_current_sandbox(
&self,
mode: SnapshotMode,
) -> Result<SnapshotId, Box<dyn Error>> {
let sandbox_id = latest_sandbox_id(self.conversation.as_ref())
.await?
.ok_or("no sandbox has been created in this conversation yet")?;
let id = self
.conversation
.exoharness_handle()
.snapshot_sandbox(sandbox_id, mode)
.await?;
Ok(id)
}

async fn list_snapshots(&self) -> Result<Vec<(SnapshotId, SandboxId)>, Box<dyn Error>> {
list_snapshots(self.conversation.as_ref()).await
}

/// Restore the conversation's sandbox to a previously-taken snapshot.
/// Stops the current container, decodes the snapshot payload, and starts
/// a fresh container from that state.
async fn rewind_to_snapshot(&self, snapshot_id_str: &str) -> Result<(), Box<dyn Error>> {
let snapshot_id = snapshot_id_str
.parse::<SnapshotId>()
.map_err(|error| format!("invalid snapshot id `{snapshot_id_str}`: {error}"))?;
let sandbox_id = sandbox_id_for_snapshot(self.conversation.as_ref(), snapshot_id)
.await?
.ok_or_else(|| format!("snapshot {snapshot_id} not found in this conversation"))?;
self.conversation
.exoharness_handle()
.start_sandbox(StartSandboxRequest {
id: sandbox_id,
snapshot_id,
idle_seconds: None,
})
.await?;
Ok(())
}

async fn send(&mut self, input: &str) -> Result<(), Box<dyn Error>> {
let mut stream = self
.conversation
Expand Down Expand Up @@ -561,6 +656,95 @@ fn render_user_content_for_history(content: &UserContent) -> String {
}
}

fn print_help() {
println!("repl commands:");
println!(" /quit | /exit exit the repl");
println!(" /history reprint the conversation transcript");
println!(" /snapshot capture filesystem-only snapshot of the sandbox");
println!(" /checkpoint capture full-state snapshot (filesystem +");
println!(" running processes + memory); requires CRIU");
println!(" + docker experimental on the host");
println!(" /snapshots list snapshots taken in this conversation");
println!(" /rewind <id> restore the sandbox to a previous snapshot");
println!(" or checkpoint");
println!(" /help show this message");
}

/// Walk the conversation's event log to find the latest `SandboxCreated`
/// event, returning the sandbox id. Returns `None` if no sandbox has been
/// created yet (e.g. nothing has been chatted with).
async fn latest_sandbox_id(
conversation: &dyn HarnessConversation,
) -> Result<Option<SandboxId>, Box<dyn Error>> {
let result = conversation
.exoharness_handle()
.get_events(Some(EventQuery {
cursor: None,
direction: Some(EventQueryDirection::Desc),
limit: Some(50),
session_id: None,
turn_id: None,
types: Some(vec!["sandbox_created".to_string()]),
}))
.await?;
for event in result.events {
if let EventData::SandboxCreated { sandbox_id, .. } = event.data {
return Ok(Some(sandbox_id));
}
}
Ok(None)
}

/// All snapshots taken in the conversation, oldest-first. Each tuple is
/// `(snapshot_id, sandbox_id_it_was_taken_from)`.
async fn list_snapshots(
conversation: &dyn HarnessConversation,
) -> Result<Vec<(SnapshotId, SandboxId)>, Box<dyn Error>> {
let mut out = Vec::new();
let mut cursor: Option<EventId> = None;
loop {
let result = conversation
.exoharness_handle()
.get_events(Some(EventQuery {
cursor,
direction: Some(EventQueryDirection::Asc),
limit: Some(100),
session_id: None,
turn_id: None,
types: Some(vec!["sandbox_snapshotted".to_string()]),
}))
.await?;
let events_empty = result.events.is_empty();
for event in result.events {
if let EventData::SandboxSnapshotted {
sandbox_id,
snapshot_id,
} = event.data
{
out.push((snapshot_id, sandbox_id));
}
}
if events_empty || result.cursor.is_none() {
break;
}
cursor = result.cursor;
}
Ok(out)
}

/// Find the sandbox a particular snapshot was taken from, by scanning the
/// `SandboxSnapshotted` events.
async fn sandbox_id_for_snapshot(
conversation: &dyn HarnessConversation,
target: SnapshotId,
) -> Result<Option<SandboxId>, Box<dyn Error>> {
let snapshots = list_snapshots(conversation).await?;
Ok(snapshots
.into_iter()
.find(|(snapshot_id, _)| *snapshot_id == target)
.map(|(_, sandbox_id)| sandbox_id))
}

#[cfg(test)]
mod tests {
use super::{render_tool_call, render_tool_result, render_user_content_for_history};
Expand Down
6 changes: 5 additions & 1 deletion crates/executor/src/basic_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,11 @@ impl ConversationHandle for FakeConversationHandle {
Err(anyhow!("not implemented"))
}

async fn snapshot_sandbox(&self, _id: SandboxId) -> Result<SnapshotId> {
async fn snapshot_sandbox(
&self,
_id: SandboxId,
_mode: exoharness::SnapshotMode,
) -> Result<SnapshotId> {
Err(anyhow!("not implemented"))
}

Expand Down
4 changes: 2 additions & 2 deletions crates/executor/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ pub use exoharness::{
AgentHandle, BasicExoHarness, BasicExoHarnessConfig, Binding, BindingMetadata,
ConversationHandle, EventData, EventId, EventQuery, EventQueryDirection, ExoHarness,
FileSystemMount, FileSystemMountMode, ForkConversationRequest, PutSecretRequest,
SANDBOX_MAIN_MOUNT_DIR, SandboxBackendChoice, Secret, SecretBackendChoice, SecretMetadata,
SessionId, Uuid7,
SANDBOX_MAIN_MOUNT_DIR, SandboxBackendChoice, SandboxId, Secret, SecretBackendChoice,
SecretMetadata, SessionId, SnapshotId, SnapshotMode, StartSandboxRequest, Uuid7,
};
pub use harness_basic::BasicHarness;
pub use harness_config::load_agent_config;
Expand Down
2 changes: 1 addition & 1 deletion crates/exoharness/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ lingua.workspace = true
object_store = { workspace = true, optional = true }
serde.workspace = true
serde_json.workspace = true
tempfile = "3.23.0"
thiserror.workspace = true
tokio = { version = "1.52.1", default-features = false, features = [
"fs",
Expand All @@ -44,7 +45,6 @@ tokio-util = { workspace = true, optional = true, features = ["compat"] }
uuid = { workspace = true, features = ["serde", "v4", "v7", "js"] }

[dev-dependencies]
tempfile = "3.23.0"
tokio = { version = "1.52.1", features = [
"fs",
"macros",
Expand Down
Loading