withmartian
diff --git a/‎README.md‎
Lines changed: 6 additions & 2 deletions b/‎README.md‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎examples/03_state_snapshotting.py‎
Lines changed: 143 additions & 0 deletions b/‎examples/03_state_snapshotting.py‎
Lines changed: 143 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/ares/code_agents/mini_swe_agent.py‎
Lines changed: 6 additions & 0 deletions b/‎src/ares/code_agents/mini_swe_agent.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/ares/containers/daytona.py‎
Lines changed: 3 additions & 1 deletion b/‎src/ares/containers/daytona.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/ares/containers/docker.py‎
Lines changed: 15 additions & 3 deletions b/‎src/ares/containers/docker.py‎
Lines changed: 15 additions & 3 deletions
@@ -1,4 +1,8 @@
-# ARES
+# ARES: Agentic Research & Evaluation Suite
+
+<p align="center">
+  <img margin="auto" width="auto" height="312" alt="image" src="https://github.com/user-attachments/assets/ae34ab36-b78f-48de-93c9-01d611a547e3" />
+</p>
 
 ARES (Agentic Research and Evaluation Suite) is an RL-first framework for training and evaluating agents.
 
@@ -133,4 +137,4 @@ if __name__ == "__main__":
 This example uses:
 - **Container backend:** Local Docker (change to `daytona.DaytonaContainer` for cloud)
 - **LLM backend:** Martian API (or any OpenAI-compatible API)
-- **Code agent:** MiniSWE agent from the mini-swe-agent library
+- **Code agent:** MiniSWE agent from the mini-swe-agent library
@@ -0,0 +1,143 @@
+"""Example demonstrating environment state snapshotting and restoration.
+
+This example shows how to:
+1. Create a snapshot after reset (at episode boundary)
+2. Save the snapshot to disk
+3. Restore an environment from a saved snapshot
+4. Continue execution from the restored state
+
+Example usage:
+
+    1. Make sure you have examples dependencies installed
+       `uv sync --group examples`
+    2. Run the example
+       `uv run -m examples.03_state_snapshotting`
+"""
+
+import asyncio
+import pathlib
+import tempfile
+
+from ares.code_agents import mini_swe_agent
+from ares.containers import docker
+from ares.environments import snapshot
+from ares.environments import swebench_env
+from ares.llms import chat_completions_compatible
+
+
+async def main():
+    # Create an LLM client
+    agent = chat_completions_compatible.ChatCompletionCompatibleLLMClient(model="openai/gpt-4o-mini")
+
+    # Load SWE-bench tasks
+    all_tasks = swebench_env.swebench_verified_tasks()
+    tasks = [all_tasks[0]]
+
+    print(f"Running on task: {tasks[0].instance_id}")
+    print(f"Repository: {tasks[0].repo}")
+    print("-" * 80)
+
+    # Create a temporary directory for snapshots
+    with tempfile.TemporaryDirectory() as snapshot_dir:
+        snapshot_path = pathlib.Path(snapshot_dir)
+
+        # === PART 1: Create and save a snapshot ===
+        print("\n[PART 1] Creating initial environment and snapshot...")
+
+        async with swebench_env.SweBenchEnv(
+            tasks=tasks,
+            code_agent_factory=mini_swe_agent.MiniSWECodeAgent,
+            container_factory=docker.DockerContainer,
+        ) as env:
+            # Reset the environment to get the first timestep
+            ts = await env.reset()
+            print(f"Environment reset complete. Step count: {env._step_count}")
+
+            # Take a few steps before snapshotting
+            for i in range(3):
+                action = await agent(ts.observation)
+                print(f"  Step {i}: Taking action...")
+                ts = await env.step(action)
+
+                if ts.last():
+                    print("  Episode completed early")
+                    break
+
+            print(f"Current step count: {env._step_count}")
+
+            # Wait for agent to finish current operation (reach episode boundary)
+            # In practice, you'd snapshot after step() returns with done=True
+            # or after reset() completes. For this example, we'll simulate
+            # waiting for agent to finish.
+            if not ts.last():
+                print("\n  Note: For snapshotting, we need to be at episode boundary.")
+                print("  Cancelling agent task to reach boundary...")
+                if env._code_agent_task and not env._code_agent_task.done():
+                    env._code_agent_task.cancel()
+                    import contextlib
+
+                    with contextlib.suppress(asyncio.CancelledError):
+                        await env._code_agent_task
+
+            # Now we can export state (at episode boundary)
+            print("\n  Exporting state snapshot...")
+            snap = await env.export_state(snapshot_path, snapshot_id="example-snapshot")
+
+            print(f"  ✓ Snapshot created: {snap.snapshot_id}")
+            print(f"  ✓ Snapshot saved to: {snap.snapshot_dir}")
+            print(f"  ✓ Step count in snapshot: {snap.step_count}")
+            print(f"  ✓ Task type: {snap.task_type}")
+            print(f"  ✓ Container type: {snap.container_type}")
+
+        # === PART 2: Restore from snapshot ===
+        print("\n[PART 2] Restoring environment from snapshot...")
+
+        # Load snapshot metadata
+        snapshot_file = snapshot_path / "example-snapshot" / "snapshot.json"
+        loaded_snap = snapshot.EnvironmentSnapshot.load_from_file(snapshot_file)
+
+        print(f"  ✓ Loaded snapshot: {loaded_snap.snapshot_id}")
+        print(f"  ✓ Original step count: {loaded_snap.step_count}")
+
+        # Restore environment from snapshot
+        # Note: This creates a new environment instance with the saved state
+        restored_env = await swebench_env.SweBenchEnv.load_from_state(
+            loaded_snap,
+            container_factory=docker.DockerContainer,
+            code_agent_factory=mini_swe_agent.MiniSWECodeAgent,
+        )
+
+        print("  ✓ Environment restored")
+        print(f"  ✓ Restored step count: {restored_env._step_count}")
+        print(f"  ✓ Task: {restored_env._current_task.instance_id}")
+
+        # Use the restored environment in async context
+        async with restored_env:
+            print("\n[PART 3] Continuing from restored state...")
+
+            # The environment is now at the same state as when we snapshotted
+            # We can continue taking steps from here
+            ts = await restored_env.reset()  # Reset to start a new episode
+            step_count = 0
+
+            # Take a few more steps to demonstrate it works
+            while not ts.last() and step_count < 3:
+                action = await agent(ts.observation)
+                print(f"  Step {step_count}: Taking action from restored env...")
+                ts = await restored_env.step(action)
+                step_count += 1
+
+            print(f"\n  ✓ Completed {step_count} additional steps from restored state")
+
+        print("\n" + "=" * 80)
+        print("Snapshot example completed successfully!")
+        print("=" * 80)
+        print("\nKey takeaways:")
+        print("  1. Snapshots can only be taken at episode boundaries")
+        print("  2. Snapshots save: task state, container filesystem, agent messages")
+        print("  3. Restored environments can continue execution normally")
+        print("  4. Use cases: debugging, RL replay, mechanistic interpretability")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -21,7 +21,7 @@ dependencies = [
     "daytona>=0.125.0",
     "docker>=7.1.0",
     "frozendict>=2.4.7",
-    "harbor>=0.1.25",
+    "harbor>=0.1.32",
     "httpx>=0.28.1",
     "jinja2>=3.1.6",
     "mini-swe-agent>=1.17.3",
 
@@ -1,3 +1,9 @@
+# Original code: https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/agents/default.py
+# Copyright (c) 2025 Kilian A. Lieret and Carlos E. Jimenez
+# Licensed under the MIT License.
+#
+# Modifications Copyright (c) 2026 Martian
+
 """A Code Agent wrapping the mini-swe-agent.
 
 Last checked: Nov 15, 2025
 
@@ -6,6 +6,7 @@
 import functools
 import logging
 import pathlib
+import shlex
 
 import daytona
 import daytona.common.errors
@@ -124,7 +125,8 @@ async def exec_run(
         # If we get a Timeout, it gets re-raised.
         result = await _exec_with_retry(
             self._sbx,
-            command,
+            # NOTE: Many code agents expect things like `.bashrc` to be loaded, so we use `bash -lc` here
+            f"bash -lc {shlex.quote(command)}",
             workdir=workdir,
             env=env,
             timeout_s=timeout_s,
 
@@ -27,15 +27,19 @@ def _client(self) -> docker.DockerClient:
     async def start(self, env: dict[str, str] | None = None) -> None:
         """Start the container."""
         if self.image is None:
-            assert self.dockerfile_path is not None, "Must specify one of image or dockerfile_path"
+            if self.dockerfile_path is None:
+                raise ValueError("Must specify one of image or dockerfile_path")
+
+            dockerfile_path = pathlib.Path(self.dockerfile_path)
             # TODO: Some kind of cache for dockerfile directory to avoid
             #       rebuilding same image over and over again?
             image_obj, _ = await asyncio.to_thread(
                 self._client.images.build,
-                path=self.dockerfile_path.parent,
+                path=dockerfile_path.parent.as_posix(),
                 tag=self.name,
             )
             self.image = image_obj.id
+            assert self.image is not None, f"Image ID is None for container {self.name}"
 
         self._container = await asyncio.to_thread(
             self._client.containers.run,
@@ -64,7 +68,9 @@ async def exec_run(
         result = await asyncio.wait_for(
             asyncio.to_thread(
                 self._container.exec_run,
-                ["sh", "-c", command],
+                # TODO: Consolidate this implementation into some container base class/helper fn
+                # NOTE: Many code agents expect things like `.bashrc` to be loaded, so we use `bash -lc` here
+                ["bash", "-lc", command],
                 workdir=workdir,
                 environment=env,
             ),
@@ -93,6 +99,12 @@ async def upload_files(self, local_paths: list[pathlib.Path], remote_paths: list
             # Determine the destination directory
             remote_dir = str(pathlib.Path(remote_path).parent)
 
+            # Create dirs if they don't exist (since otherwise Docker complains)
+            await asyncio.to_thread(
+                self._container.exec_run,
+                cmd=f"mkdir -p {remote_dir}",
+            )
+
             # Upload the tar archive to the container
             await asyncio.to_thread(
                 self._container.put_archive,