Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/ares/containers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
from ares.containers.containers import Container
from ares.containers.containers import ContainerFactory
from ares.containers.containers import Resources
from ares.containers.containers import SnapshotableContainer
from ares.containers.daytona import DaytonaContainer

__all__ = [
"Container",
"ContainerFactory",
"DaytonaContainer",
"Resources",
"SnapshotableContainer",
]
60 changes: 60 additions & 0 deletions src/ares/containers/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,66 @@ def stop_and_remove(self) -> None:
"""


class SnapshotableContainer(Container, Protocol):
"""A container that supports filesystem state snapshotting.

This extends the Container protocol with the ability to capture and
restore filesystem state, enabling algorithms like Go-Explore that
need to return to previously visited environment states.
"""

@abc.abstractmethod
async def snapshot(self) -> str:
"""Capture the container's current filesystem state.

Creates a snapshot of all filesystem changes since the container was started.
Running processes are NOT captured -- only filesystem state.

Returns:
A snapshot ID string that can be passed to from_snapshot() to create
a new container with this filesystem state.
"""

@classmethod
@abc.abstractmethod
def from_snapshot(
cls,
snapshot_id: str,
*,
name: str | None = None,
resources: Resources | None = None,
default_workdir: str | None = None,
) -> "SnapshotableContainer":
"""Create a new (unstarted) container from a previously captured snapshot.

Args:
snapshot_id: A snapshot ID previously returned by snapshot().
name: Optional name for the container.
resources: Optional resource constraints.
default_workdir: Optional default working directory for commands.

Returns:
A new SnapshotableContainer instance (not yet started).
"""
...

@abc.abstractmethod
async def delete_snapshot(self, snapshot_id: str) -> None:
"""Delete a previously captured snapshot, freeing its resources.

Args:
snapshot_id: The snapshot ID to delete.
"""

@abc.abstractmethod
def delete_snapshot_sync(self, snapshot_id: str) -> None:
"""Synchronous version of delete_snapshot for atexit cleanup.

Args:
snapshot_id: The snapshot ID to delete.
"""


class ContainerFactory(Protocol):
"""Protocol for creating containers from images or Dockerfiles.

Expand Down
47 changes: 46 additions & 1 deletion src/ares/containers/docker.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
"""An interface for interacting with local Docker containers."""

import asyncio
import contextlib
import dataclasses
import functools
import io
import logging
import pathlib
import tarfile
from typing import cast
import uuid

import docker
import docker.errors
Expand All @@ -15,6 +18,8 @@

from ares.containers import containers

_LOGGER = logging.getLogger(__name__)


def _make_docker_client() -> docker.DockerClient:
try:
Expand All @@ -24,7 +29,7 @@ def _make_docker_client() -> docker.DockerClient:


@dataclasses.dataclass(kw_only=True)
class DockerContainer(containers.Container):
class DockerContainer(containers.SnapshotableContainer):
image: str | None = None
dockerfile_path: pathlib.Path | str | None = None
name: str | None = None
Expand Down Expand Up @@ -183,6 +188,46 @@ async def download_files(self, remote_paths: list[str], local_paths: list[pathli
with open(local_path, "wb") as f:
f.write(file_data.read())

async def snapshot(self) -> str:
"""Commit current container state as a Docker image."""
if self._container is None:
raise RuntimeError("Container not started, snapshot is not possible.")
Comment on lines +191 to +194
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

snapshot() raises raw RuntimeError when _container is None, violating the repo's error-handling guideline (CLAUDE.md) and preventing callers from distinguishing container lifecycle failures. Can we introduce or reuse a container-specific exception (e.g. ContainerNotStartedError) that derives from the repo exception hierarchy?

Finding type: AI Coding Guidelines | Severity: 🟢 Low


Want Baz to fix this for you? Activate Fixer

Other fix methods

Fix in Cursor

Prompt for AI Agents:

In src/ares/containers/docker.py around lines 191-194, the snapshot() method currently
raises a raw RuntimeError when self._container is None. Replace this with a
repo-specific container lifecycle exception (for example ContainerNotStartedError) that
derives from the project's error hierarchy described in CLAUDE.md. Add an import for
that exception at the top of the file (or create it in the appropriate exceptions module
if it doesn't exist), change the raise site to raise ContainerNotStartedError with a
clear message, and update any other lifecycle methods in this file that currently raise
RuntimeError to raise the same container-specific exception for consistency.


tag = f"ares-snapshot-{uuid.uuid4().hex[:12]}"
image = await asyncio.to_thread(
self._container.commit,
repository="ares-go-explore",
tag=tag,
conf={"Labels": {"ares-go-explore": "true"}},
)
_LOGGER.info("Snapshot created: %s (tag: %s)", image.id, tag)
return image.id

async def delete_snapshot(self, snapshot_id: str) -> None:
"""Delete a Docker image created by snapshot()."""
try:
await asyncio.to_thread(self._client.images.remove, snapshot_id)
_LOGGER.info("Snapshot deleted: %s", snapshot_id)
except docker.errors.ImageNotFound:
_LOGGER.debug("Snapshot %s already deleted.", snapshot_id)

def delete_snapshot_sync(self, snapshot_id: str) -> None:
"""Synchronous version for atexit cleanup."""
with contextlib.suppress(docker.errors.ImageNotFound):
self._client.images.remove(snapshot_id)
Comment on lines +196 to +217
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

cat -n src/ares/containers/docker.py | sed -n '190,225p'

Repository: withmartian/ares

Length of output: 1691


🏁 Script executed:

# Get more context around the snapshot-related methods
cat -n src/ares/containers/docker.py | sed -n '170,230p'

Repository: withmartian/ares

Length of output: 2987


🏁 Script executed:

# Search for the snapshot method implementation
rg -n "def snapshot" src/ares/containers/docker.py -A 15

Repository: withmartian/ares

Length of output: 765


🏁 Script executed:

# Check if label validation exists anywhere
rg -n "ares-go-explore" src/ares/containers/docker.py

Repository: withmartian/ares

Length of output: 167


🏁 Script executed:

# Check the full class structure for context
rg -n "class DockerContainer" src/ares/containers/docker.py -A 50

Repository: withmartian/ares

Length of output: 2559


Use the snapshot label as a delete guard.

snapshot() marks ARES-created images with ares-go-explore=true, but both delete paths ignore that marker and will remove any image ID they are given. That makes a bad or stale snapshot_id capable of deleting an unrelated local image. Validate the label before images.remove() and keep the current not-found idempotence on top.

🛡️ Suggested guard
+    def _assert_snapshot_image(self, snapshot_id: str) -> None:
+        image = self._client.images.get(snapshot_id)
+        labels = image.attrs.get("Config", {}).get("Labels") or {}
+        if labels.get("ares-go-explore") != "true":
+            raise ValueError(f"{snapshot_id} is not an ARES snapshot")
+
     async def delete_snapshot(self, snapshot_id: str) -> None:
         """Delete a Docker image created by snapshot()."""
         try:
+            await asyncio.to_thread(self._assert_snapshot_image, snapshot_id)
             await asyncio.to_thread(self._client.images.remove, snapshot_id)
             _LOGGER.info("Snapshot deleted: %s", snapshot_id)
         except docker.errors.ImageNotFound:
             _LOGGER.debug("Snapshot %s already deleted.", snapshot_id)
 
     def delete_snapshot_sync(self, snapshot_id: str) -> None:
         """Synchronous version for atexit cleanup."""
         with contextlib.suppress(docker.errors.ImageNotFound):
+            self._assert_snapshot_image(snapshot_id)
             self._client.images.remove(snapshot_id)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
tag = f"ares-snapshot-{uuid.uuid4().hex[:12]}"
image = await asyncio.to_thread(
self._container.commit,
repository="ares-go-explore",
tag=tag,
conf={"Labels": {"ares-go-explore": "true"}},
)
_LOGGER.info("Snapshot created: %s (tag: %s)", image.id, tag)
return image.id
async def delete_snapshot(self, snapshot_id: str) -> None:
"""Delete a Docker image created by snapshot()."""
try:
await asyncio.to_thread(self._client.images.remove, snapshot_id)
_LOGGER.info("Snapshot deleted: %s", snapshot_id)
except docker.errors.ImageNotFound:
_LOGGER.debug("Snapshot %s already deleted.", snapshot_id)
def delete_snapshot_sync(self, snapshot_id: str) -> None:
"""Synchronous version for atexit cleanup."""
with contextlib.suppress(docker.errors.ImageNotFound):
self._client.images.remove(snapshot_id)
tag = f"ares-snapshot-{uuid.uuid4().hex[:12]}"
image = await asyncio.to_thread(
self._container.commit,
repository="ares-go-explore",
tag=tag,
conf={"Labels": {"ares-go-explore": "true"}},
)
_LOGGER.info("Snapshot created: %s (tag: %s)", image.id, tag)
return image.id
def _assert_snapshot_image(self, snapshot_id: str) -> None:
image = self._client.images.get(snapshot_id)
labels = image.attrs.get("Config", {}).get("Labels") or {}
if labels.get("ares-go-explore") != "true":
raise ValueError(f"{snapshot_id} is not an ARES snapshot")
async def delete_snapshot(self, snapshot_id: str) -> None:
"""Delete a Docker image created by snapshot()."""
try:
await asyncio.to_thread(self._assert_snapshot_image, snapshot_id)
await asyncio.to_thread(self._client.images.remove, snapshot_id)
_LOGGER.info("Snapshot deleted: %s", snapshot_id)
except docker.errors.ImageNotFound:
_LOGGER.debug("Snapshot %s already deleted.", snapshot_id)
def delete_snapshot_sync(self, snapshot_id: str) -> None:
"""Synchronous version for atexit cleanup."""
with contextlib.suppress(docker.errors.ImageNotFound):
self._assert_snapshot_image(snapshot_id)
self._client.images.remove(snapshot_id)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/ares/containers/docker.py` around lines 196 - 217, delete_snapshot and
delete_snapshot_sync remove any image ID given, so validate the ARES marker
label ("ares-go-explore" == "true") before calling images.remove to avoid
deleting unrelated images; in delete_snapshot use asyncio.to_thread to first
fetch the image metadata (e.g., self._client.images.get or inspect) and check
image.attrs['Config']['Labels'] (or image.labels) contains
"ares-go-explore":"true" and only then call images.remove (preserve the existing
ImageNotFound handling and _LOGGER.info/_LOGGER.debug behavior), and in
delete_snapshot_sync do the same synchronous inspect/get+label check before
self._client.images.remove, using
contextlib.suppress(docker.errors.ImageNotFound) to keep idempotence; if the
label is absent or not matching, log a warning/debug and skip removal.


@classmethod
def from_snapshot(
cls,
snapshot_id: str,
*,
name: str | None = None,
resources: containers.Resources | None = None,
default_workdir: str | None = None,
) -> "DockerContainer":
"""Create a DockerContainer from a previously captured snapshot."""
return DockerContainer(image=snapshot_id, name=name, resources=resources, default_workdir=default_workdir)

@classmethod
def from_image(
cls,
Expand Down
86 changes: 86 additions & 0 deletions src/ares/containers/docker_snapshot_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""Tests for Docker container snapshotting."""

import unittest.mock

import pytest

from ares.containers import docker


@pytest.fixture
def mock_docker_client():
"""Create a mock Docker client."""
with unittest.mock.patch.object(docker, "_make_docker_client") as mock_fn:
client = unittest.mock.MagicMock()
mock_fn.return_value = client
yield client


@pytest.mark.asyncio
async def test_snapshot_creates_image(mock_docker_client): # noqa: ARG001
"""Test that snapshot() commits the container and returns an image ID."""
container = docker.DockerContainer(image="test:latest")

# Set up mock container
mock_inner = unittest.mock.MagicMock()
container._container = mock_inner

mock_image = unittest.mock.MagicMock()
mock_image.id = "sha256:abc123"
mock_inner.commit.return_value = mock_image

snapshot_id = await container.snapshot()

assert snapshot_id == "sha256:abc123"
mock_inner.commit.assert_called_once()
call_kwargs = mock_inner.commit.call_args
assert call_kwargs[1]["repository"] == "ares-go-explore"
assert call_kwargs[1]["conf"]["Labels"]["ares-go-explore"] == "true"


@pytest.mark.asyncio
async def test_snapshot_raises_if_not_started():
"""Test that snapshot() raises if container isn't started."""
container = docker.DockerContainer(image="test:latest")
with pytest.raises(RuntimeError, match="not started"):
await container.snapshot()


def test_from_snapshot_creates_container():
"""Test that from_snapshot() creates a DockerContainer with the snapshot as image."""
container = docker.DockerContainer.from_snapshot(
"sha256:abc123",
name="restored",
default_workdir="/workspace",
)

assert isinstance(container, docker.DockerContainer)
assert container.image == "sha256:abc123"
assert container.name == "restored"
assert container.default_workdir == "/workspace"


@pytest.mark.asyncio
async def test_delete_snapshot(mock_docker_client):
"""Test that delete_snapshot() removes the Docker image."""
container = docker.DockerContainer(image="test:latest")
await container.delete_snapshot("sha256:abc123")
mock_docker_client.images.remove.assert_called_once_with("sha256:abc123")


@pytest.mark.asyncio
async def test_delete_snapshot_ignores_not_found(mock_docker_client):
"""Test that delete_snapshot() handles already-deleted images."""
import docker as docker_lib

mock_docker_client.images.remove.side_effect = docker_lib.errors.ImageNotFound("not found")
container = docker.DockerContainer(image="test:latest")
# Should not raise
await container.delete_snapshot("sha256:abc123")


def test_delete_snapshot_sync(mock_docker_client):
"""Test synchronous snapshot deletion for atexit cleanup."""
container = docker.DockerContainer(image="test:latest")
container.delete_snapshot_sync("sha256:abc123")
mock_docker_client.images.remove.assert_called_once_with("sha256:abc123")
7 changes: 6 additions & 1 deletion src/ares/presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,19 @@ def _register_default_presets() -> None:
This function is called automatically when the presets module is imported,
ensuring built-in presets are always available.
"""
seen: set[str] = set()
for ds_spec in code_env.list_harbor_datasets():
for code_agent_id, code_agent_factory in [
("mswea", mini_swe_agent.MiniSWECodeAgent),
("terminus2", terminus2_agent.Terminus2Agent),
]:
ds_id = _make_harbor_dataset_id(ds_spec.name, ds_spec.version)
preset_name = f"{ds_id}-{code_agent_id}"
if preset_name in seen:
continue
seen.add(preset_name)
registry.register_preset(
f"{ds_id}-{code_agent_id}",
preset_name,
HarborSpec(
ds_spec=ds_spec,
dataset_id=ds_id,
Expand Down
Loading