From 66900d46b5ee1881b4d9edf4dc2de78a867b62d5 Mon Sep 17 00:00:00 2001 From: ncrispino Date: Tue, 9 Jun 2026 21:29:13 -0700 Subject: [PATCH 1/7] feat(sandbox): add OS-level SRT execution sandboxing (command_line_execution_mode: srt) Adds Anthropic sandbox-runtime (srt) as a 3rd command_line_execution_mode alongside local/docker. Default-off; opt-in via config. OS-level filesystem + network isolation derived from the same PathPermissionManager policy as the app layer (defense in depth). - SrtManager derives per-agent settings (allowWrite/denyWrite/denyRead, deny-all network, built-in secret-store read-deny baseline) and wraps commands via 'srt --settings cfg sh -c ' (sh -c form required so srt does not consume the server's -- separator). - Command-line MCP + filesystem-tools MCP servers are OS-wrapped; npx/npm and the no-roots wrapper auto-skip (registry/cache writes the sandbox blocks) and keep their app-layer protection. - Native-sandbox backends (codex --full-auto, claude_code) degrade srt->local via has_native_execution_sandbox() to avoid nested-sandbox hangs; stored config normalized so raw reads see local. - Subagents inherit parent command_line_srt_* settings (parity with Docker). - New command_line_srt_* params added to the single-source exclusion list; 'srt' added to the MCP executable allowlist. Example config + tests included. Co-Authored-By: Claude Opus 4.8 (1M context) --- massgen/backend/_excluded_params.py | 4 + massgen/backend/base.py | 39 +++- massgen/backend/claude_code.py | 4 + massgen/backend/codex.py | 4 + .../tools/filesystem/sandbox/srt_sandbox.yaml | 59 +++++ .../_code_execution_server.py | 46 +++- .../filesystem_manager/_filesystem_manager.py | 92 ++++++++ massgen/filesystem_manager/_srt_manager.py | 214 ++++++++++++++++++ massgen/mcp_tools/security.py | 4 + massgen/subagent/manager.py | 17 ++ massgen/tests/test_srt_backend_degrade.py | 64 ++++++ .../tests/test_srt_filesystem_integration.py | 158 +++++++++++++ massgen/tests/test_srt_manager.py | 206 +++++++++++++++++ massgen/tests/test_subagent_manager.py | 33 +++ 14 files changed, 938 insertions(+), 6 deletions(-) create mode 100644 massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml create mode 100644 massgen/filesystem_manager/_srt_manager.py create mode 100644 massgen/tests/test_srt_backend_degrade.py create mode 100644 massgen/tests/test_srt_filesystem_integration.py create mode 100644 massgen/tests/test_srt_manager.py diff --git a/massgen/backend/_excluded_params.py b/massgen/backend/_excluded_params.py index 1913c76da..2261cc3b1 100644 --- a/massgen/backend/_excluded_params.py +++ b/massgen/backend/_excluded_params.py @@ -38,6 +38,10 @@ # Docker credential and package management (nested dicts) "command_line_docker_credentials", "command_line_docker_packages", + # SRT (OS-level sandbox-runtime) execution mode parameters + "command_line_srt_network_allowed_domains", + "command_line_srt_deny_read", + "command_line_srt_allow_unix_sockets", "exclude_file_operation_mcps", "use_mcpwrapped_for_tool_filtering", "use_no_roots_wrapper", diff --git a/massgen/backend/base.py b/massgen/backend/base.py index c42185a53..b9f2bb651 100644 --- a/massgen/backend/base.py +++ b/massgen/backend/base.py @@ -162,11 +162,28 @@ def __init__(self, api_key: str | None = None, **kwargs): if filesystem_support in (FilesystemSupport.MCP, FilesystemSupport.NATIVE): # Validate execution mode execution_mode = kwargs.get("command_line_execution_mode", "local") - if execution_mode not in ["local", "docker"]: + if execution_mode not in ["local", "docker", "srt"]: raise ValueError( - f"Invalid command_line_execution_mode: '{execution_mode}'. Must be 'local' or 'docker'.", + f"Invalid command_line_execution_mode: '{execution_mode}'. Must be 'local', 'docker', or 'srt'.", ) + # Backends with a native execution sandbox (codex, claude_code) must + # not be SRT-wrapped: SRT (Seatbelt/bubblewrap) would nest inside the + # backend's own sandbox and hang. Degrade srt -> local; the backend's + # native sandbox provides the isolation. (SRT targets backends without + # one — e.g. OpenAI/Claude/Gemini/Grok API backends via the MCP.) + if execution_mode == "srt" and self.has_native_execution_sandbox(): + logger.warning( + f"[{self.get_provider_name()}] command_line_execution_mode 'srt' ignored — this backend " + "has a native execution sandbox; using its own isolation (SRT would nest sandboxes). " + "Falling back to 'local' for MassGen's command-line MCP.", + ) + execution_mode = "local" + # Normalize the stored config too, so downstream RAW reads of + # command_line_execution_mode (e.g. claude_code disallowed-tools / + # system-prompt logic) see the effective 'local', not 'srt'. + kwargs["command_line_execution_mode"] = "local" + # Validate network mode network_mode = kwargs.get("command_line_docker_network_mode", "none") if network_mode not in ["none", "bridge", "host"]: @@ -193,6 +210,11 @@ def __init__(self, api_key: str | None = None, **kwargs): # Nested credential and package management "command_line_docker_credentials": kwargs.get("command_line_docker_credentials"), "command_line_docker_packages": kwargs.get("command_line_docker_packages"), + # SRT (OS-level sandbox-runtime) execution mode. Network defaults to + # deny-all; an allowlisted domain is an opt-in capability grant. + "command_line_srt_network_allowed_domains": kwargs.get("command_line_srt_network_allowed_domains", []), + "command_line_srt_deny_read": kwargs.get("command_line_srt_deny_read", []), + "command_line_srt_allow_unix_sockets": kwargs.get("command_line_srt_allow_unix_sockets", []), "enable_audio_generation": kwargs.get("enable_audio_generation", False), "exclude_file_operation_mcps": kwargs.get("exclude_file_operation_mcps", False), "use_mcpwrapped_for_tool_filtering": kwargs.get("use_mcpwrapped_for_tool_filtering", False), @@ -663,6 +685,19 @@ def get_filesystem_support(self) -> FilesystemSupport: # Subclasses should override this method return FilesystemSupport.NONE + def has_native_execution_sandbox(self) -> bool: + """Whether this backend sandboxes its own command execution natively. + + Backends like Codex (`--full-auto` → Landlock/Seatbelt) and Claude Code + confine their own command execution. For these, MassGen's OS-level SRT + sandbox is both redundant and harmful: SRT also uses Seatbelt/bubblewrap, + and the backend spawns its MCP servers inside its own sandbox, so an + SRT-wrapped command nests sandboxes and hangs. When True, MassGen degrades + `command_line_execution_mode: srt` to `local` for this backend (its native + sandbox provides the isolation). SRT is for backends WITHOUT one. + """ + return False + def get_supported_builtin_tools(self) -> list[str]: """Get list of builtin tools supported by this provider.""" return [] diff --git a/massgen/backend/claude_code.py b/massgen/backend/claude_code.py index 69cd50670..45eabb683 100644 --- a/massgen/backend/claude_code.py +++ b/massgen/backend/claude_code.py @@ -351,6 +351,10 @@ def get_filesystem_support(self) -> FilesystemSupport: """ return FilesystemSupport.NATIVE + def has_native_execution_sandbox(self) -> bool: + """Claude Code confines its own execution via its OS-level sandbox.""" + return True + def is_stateful(self) -> bool: """ Claude Code backend is stateful - maintains conversation context. diff --git a/massgen/backend/codex.py b/massgen/backend/codex.py index e7891a05d..823bc1347 100644 --- a/massgen/backend/codex.py +++ b/massgen/backend/codex.py @@ -2566,6 +2566,10 @@ def get_filesystem_support(self) -> FilesystemSupport: """Codex has native filesystem support via built-in tools.""" return FilesystemSupport.NATIVE + def has_native_execution_sandbox(self) -> bool: + """Codex confines its own execution via `--full-auto` (Landlock/Seatbelt).""" + return True + def is_stateful(self) -> bool: """Codex maintains session state via session files.""" return True diff --git a/massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml b/massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml new file mode 100644 index 000000000..12765746f --- /dev/null +++ b/massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml @@ -0,0 +1,59 @@ +# SRT Sandboxing — OS-level command execution sandbox +# +# Enables Anthropic's sandbox-runtime (SRT: bubblewrap on Linux, Seatbelt on +# macOS) for an agent's command/code execution. SRT enforces BOTH filesystem and +# network isolation at the OS level, derived from the SAME path-permission policy +# as MassGen's application-level permission layer (defense in depth). +# +# This is the right sandbox for backends that run commands through MassGen's +# command-line MCP (OpenAI/Claude/Gemini/Grok API backends). Native-sandbox +# backends (codex `--full-auto`, claude_code) use their own and don't need this. +# +# Prerequisite (one-time): +# npm install -g @anthropic-ai/sandbox-runtime +# +# Turn it on with ONE knob: `command_line_execution_mode: srt`. +# +# Run with: +# uv run massgen --automation --config massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml \ +# "Create out.txt in the workspace, then try to read ~/.ssh/id_rsa" +# Expected: the workspace write succeeds; reading a secret store like ~/.ssh and +# all network egress are denied by the sandbox. (SRT allows general filesystem +# READS by default so commands can run; we deny the well-known secret locations — +# ~/.ssh, ~/.aws, ~/.gnupg, cloud creds, etc. — and all WRITES outside the +# workspace. Add more read-denied paths via command_line_srt_deny_read.) + +agents: + - id: "sandboxed" + backend: + type: "openai" + model: "gpt-5" + + cwd: "workspace_sandboxed" + enable_mcp_command_line: true + + # ▼▼▼ The single opt-in knob ▼▼▼ + command_line_execution_mode: "srt" + + # ── Optional SRT knobs (all default-safe) ────────────────────────────── + # Network is DENY-ALL by default. Each allowlisted domain is a capability + # grant (an allowlisted domain can be an exfiltration channel), so add only + # what the sandboxed commands genuinely need: + # command_line_srt_network_allowed_domains: + # - "pypi.org" + # - "files.pythonhosted.org" + # + # Extra paths to block from reading (on top of the agent's protected paths): + # command_line_srt_deny_read: + # - "~/.aws" + # + # Unix sockets to allow (advanced; e.g. a local service): + # command_line_srt_allow_unix_sockets: [] + +orchestrator: + snapshot_storage: "snapshots" + agent_temporary_workspace: "temp_workspaces" + +ui: + display_type: "rich_terminal" + logging_enabled: true diff --git a/massgen/filesystem_manager/_code_execution_server.py b/massgen/filesystem_manager/_code_execution_server.py index 9e56e80b6..ead80530d 100644 --- a/massgen/filesystem_manager/_code_execution_server.py +++ b/massgen/filesystem_manager/_code_execution_server.py @@ -255,8 +255,14 @@ async def create_server() -> fastmcp.FastMCP: "--execution-mode", type=str, default="local", - choices=["local", "docker"], - help="Execution mode: local (subprocess) or docker (container isolation)", + choices=["local", "docker", "srt"], + help="Execution mode: local (subprocess), docker (container isolation), or srt (OS-level sandbox-runtime)", + ) + parser.add_argument( + "--srt-settings", + type=str, + default=None, + help="Path to the SRT settings JSON file (required for srt mode)", ) parser.add_argument( "--agent-id", @@ -294,6 +300,7 @@ async def create_server() -> fastmcp.FastMCP: mcp.allowed_commands = args.allowed_commands # Whitelist patterns mcp.blocked_commands = args.blocked_commands # Blacklist patterns mcp.execution_mode = args.execution_mode + mcp.srt_settings_path = args.srt_settings mcp.agent_id = args.agent_id mcp.instance_id = args.instance_id mcp.enable_sudo = args.enable_sudo @@ -334,6 +341,26 @@ async def create_server() -> fastmcp.FastMCP: pass raise RuntimeError(f"Failed to connect to Docker: {e}") + # Validate SRT mode at startup (parallel to Docker validation above). + if args.execution_mode == "srt": + import platform + + from massgen.filesystem_manager._srt_manager import srt_available + + if platform.system() == "Windows": + raise RuntimeError( + "SRT sandboxing (command_line_execution_mode: srt) is not supported on Windows. " "Use 'docker' mode or run under Linux/macOS.", + ) + if not args.srt_settings: + raise RuntimeError("SRT mode requires --srt-settings . This should be configured by the orchestrator.") + if not Path(args.srt_settings).exists(): + raise RuntimeError(f"SRT settings file not found: {args.srt_settings}") + if not srt_available(): + raise RuntimeError( + "SRT mode requested but the 'srt' CLI was not found on PATH. " "Install it with: npm install -g @anthropic-ai/sandbox-runtime", + ) + print(f"[SRT] Sandbox-runtime enabled (settings: {args.srt_settings})") + @mcp.tool() def execute_command( command: str, @@ -585,16 +612,27 @@ def run_docker_exec(): } else: - # Local mode: execute using subprocess (existing logic) + # Local or SRT mode: execute using subprocess (existing logic). # Prepare environment (auto-detects .venv in work_dir and sets up skills) env = _prepare_environment(work_path, mcp.local_skills_directory) + # SRT mode wraps the command in an OS-level sandbox. The original + # command is passed to `sh -c` INSIDE the sandbox so shell features + # (pipes, redirection) and any spawned subprocesses are contained. + effective_command = command + if mcp.execution_mode == "srt": + from massgen.filesystem_manager._srt_manager import ( + wrap_command_with_srt, + ) + + effective_command = wrap_command_with_srt(command, mcp.srt_settings_path) + # Execute command start_time = time.time() try: result = subprocess.run( - command, + effective_command, shell=True, cwd=str(work_path), timeout=timeout, diff --git a/massgen/filesystem_manager/_filesystem_manager.py b/massgen/filesystem_manager/_filesystem_manager.py index 485ed93fe..8b4116faf 100644 --- a/massgen/filesystem_manager/_filesystem_manager.py +++ b/massgen/filesystem_manager/_filesystem_manager.py @@ -171,6 +171,9 @@ def __init__( command_line_docker_enable_sudo: bool = False, command_line_docker_credentials: dict[str, Any] | None = None, command_line_docker_packages: dict[str, Any] | None = None, + command_line_srt_network_allowed_domains: list[str] | None = None, + command_line_srt_deny_read: list[str] | None = None, + command_line_srt_allow_unix_sockets: list[str] | None = None, enable_audio_generation: bool = False, enable_file_generation: bool = False, exclude_file_operation_mcps: bool = False, @@ -307,6 +310,9 @@ def __init__( self.command_line_docker_enable_sudo = command_line_docker_enable_sudo self.command_line_docker_credentials = command_line_docker_credentials self.command_line_docker_packages = command_line_docker_packages + self.command_line_srt_network_allowed_domains = command_line_srt_network_allowed_domains or [] + self.command_line_srt_deny_read = command_line_srt_deny_read or [] + self.command_line_srt_allow_unix_sockets = command_line_srt_allow_unix_sockets or [] # Initialize Docker manager if Docker mode enabled self.docker_manager = None @@ -353,6 +359,25 @@ def __init__( if context_paths: self.path_permission_manager.add_context_paths(context_paths) + # Initialize SRT (OS-level sandbox-runtime) manager if SRT mode enabled. + # Reads managed_paths lazily at settings-build time, so it's safe to + # construct here before workspace/temp paths are added below. Settings + # live OUTSIDE the sandbox's writable set so the agent can't tamper with + # its own policy. + self.srt_manager = None + if enable_mcp_command_line and self.command_line_execution_mode == "srt": + import tempfile + + from ._srt_manager import SrtManager + + self.srt_manager = SrtManager( + self.path_permission_manager, + network_allowed_domains=self.command_line_srt_network_allowed_domains, + extra_deny_read=self.command_line_srt_deny_read, + allow_unix_sockets=self.command_line_srt_allow_unix_sockets, + settings_dir=Path(tempfile.gettempdir()) / "massgen_srt", + ) + # Set agent_temporary_workspace_parent first, before calling _setup_workspace self.agent_temporary_workspace_parent = agent_temporary_workspace_parent @@ -1591,6 +1616,12 @@ def get_mcp_filesystem_config( # Normal mode: Exclude read_media_file since we have our own implementation config["exclude_tools"] = ["read_media_file"] + # Defense in depth: OS-sandbox the filesystem (write_file/edit_file) server + # too. Self-skips npx/npm launchers (they need registry + ~/.npm writes the + # sandbox blocks) — those keep the npm server's own --allowed-paths + the hook. + if self.command_line_execution_mode == "srt" and self.srt_manager: + config = self._wrap_stdio_config_with_srt(config) + return config def get_workspace_tools_mcp_config(self, backend_type: str | None = None) -> dict[str, Any]: @@ -1654,6 +1685,61 @@ def get_workspace_tools_mcp_config(self, backend_type: str | None = None) -> dic ], ) + # Defense in depth: OS-sandbox the filesystem-tools SERVER itself so a + # flawed/injected/permission-hook-bypassing file op is OS-denied, not just + # app-denied. (Command execution is already OS-sandboxed by the command_line + # srt mode; this closes the same hole for the file-manipulation MCP tools.) + if self.command_line_execution_mode == "srt" and self.srt_manager: + config = self._wrap_stdio_config_with_srt(config) + + return config + + def _wrap_stdio_config_with_srt(self, config: dict[str, Any]) -> dict[str, Any]: + """Wrap a stdio MCP server launch with `srt` using the fs_tools profile. + + Transforms ``{command: "fastmcp", args: ["run", srv, "--", "--allowed-paths", …]}`` + into ``{command: "srt", args: ["--settings", , "sh", "-c", ""]}``. + + CRITICAL: the original command line (including its ``--`` option separator) + is passed via ``sh -c`` so that `srt`'s own argv parser cannot consume the + ``--`` (which would strip the server's ``--allowed-paths`` and crash it). + Verified: the direct-argv form breaks the MCP handshake; the sh -c form works. + """ + import shlex + + # Skip launchers that need network + cache writes at STARTUP. `npx`/`npm` + # fetch the package from registry.npmjs.org and write to ~/.npm/_cacache — + # both blocked by the tight sandbox (E403 / EPERM), so wrapping them crashes + # the server. Also skip the no-roots filesystem wrapper, which is launched as + # `python3 filesystem_no_roots.py …` but INTERNALLY spawns `npx …` (so it has + # the same network/cache need). These servers keep their app-layer protections + # (each server's own --allowed-paths + PathPermissionManager). For full OS-level + # coverage, install the server as a global binary (no npx). + command = config.get("command", "") + args = config.get("args", []) + # Token-based detection (not a fragile substring): any whole token == npx/npm, + # or the launched script is the npx-spawning no-roots wrapper. + tokens = [str(command), *[str(a) for a in args]] + needs_network = any(t in ("npx", "npm") or t.endswith("/npx") or t.endswith("/npm") or "filesystem_no_roots" in t for t in tokens) + if needs_network: + logger.info( + f"[SrtManager] Not srt-wrapping network-dependent MCP launcher " + f"('{command}' …); it keeps its app-layer permission enforcement. " + "Install the server as a global binary for OS-level sandboxing.", + ) + return config + + extra_writable = [] + if self.snapshot_storage: + extra_writable.append(self.snapshot_storage) + if self.agent_temporary_workspace_parent: + extra_writable.append(self.agent_temporary_workspace_parent) + self.srt_manager.fs_tools_extra_writable = [Path(p).resolve() for p in extra_writable] + + fs_settings = self.srt_manager.write_settings_file(profile="fs_tools", agent_id=self.agent_id) + inner_cmdline = shlex.join([str(config["command"]), *[str(a) for a in config["args"]]]) + config["command"] = self.srt_manager.srt_path + config["args"] = ["--settings", str(fs_settings), "sh", "-c", inner_cmdline] return config def get_command_line_mcp_config(self) -> dict[str, Any]: @@ -1705,6 +1791,12 @@ def get_command_line_mcp_config(self) -> dict[str, Any]: if self.command_line_execution_mode == "docker" and self.command_line_docker_enable_sudo: config["args"].append("--enable-sudo") + # SRT mode: generate the per-agent settings file (derived from the SAME + # PathPermissionManager policy as the app layer) and point the server at it. + if self.command_line_execution_mode == "srt" and self.srt_manager: + settings_path = self.srt_manager.write_settings_file(profile="execution", agent_id=self.agent_id) + config["args"].extend(["--srt-settings", str(settings_path)]) + # Add command filters if specified if self.command_line_allowed_commands: config["args"].extend(["--allowed-commands"] + self.command_line_allowed_commands) diff --git a/massgen/filesystem_manager/_srt_manager.py b/massgen/filesystem_manager/_srt_manager.py new file mode 100644 index 000000000..86ebec2d9 --- /dev/null +++ b/massgen/filesystem_manager/_srt_manager.py @@ -0,0 +1,214 @@ +"""SRT (Anthropic sandbox-runtime) manager for MassGen. + +OS-level command/code execution sandboxing using Anthropic's +`@anthropic-ai/sandbox-runtime` (CLI `srt`): bubblewrap on Linux, Seatbelt +(`sandbox-exec`) on macOS. SRT has no Python API, so integration is by +**command wrapping**: ``srt --settings ``. + +Design notes (see plan + memory): + - **Defense in depth, not either/or.** SRT settings are *derived from the same* + `PathPermissionManager` policy as the application-level permission layer, so + the two layers can't drift. The OS layer backstops the app layer (shell + escapes, MCP-server bugs, prompt-injected file ops). + - **Sandbox the executor, not the orchestrator.** We only ever wrap MassGen's + own execution surface — the command-execution MCP and the fs-tools MCP server + — never MassGen itself. Backends with their OWN execution sandbox (codex's + `--full-auto` Landlock/Seatbelt, claude_code) use that instead of SRT. + - **Network deny-all by default.** An allowlisted domain is a capability grant + (allowlist-only egress can leak via embedded API keys), so the allowlist is + strictly opt-in. + +This module is import-safe without `srt` installed; the binary is only required +at actual execution time (`verify_available()` / runtime). +""" + +from __future__ import annotations + +import json +import platform +import shutil +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from ..logger_config import logger +from ._base import Permission + +if TYPE_CHECKING: + from ._path_permission_manager import PathPermissionManager + +# Default binary name (overridable for tests / custom installs). +DEFAULT_SRT_BINARY = "srt" + +# Credential/secret locations denied for READS by default. SRT reads are otherwise +# allow-all (empty denyRead = full read access), so without this a sandboxed command +# could `cat ~/.ssh/id_rsa` and exfiltrate secrets — the sandbox would only constrain +# writes/network. We deny the well-known secret stores (NOT all of $HOME, so commands +# can still read ~/.cache, ~/.local, system libs, etc. and keep working). Users extend +# via command_line_srt_deny_read. +_DEFAULT_DENY_READ_HOME_RELATIVE = ( + ".ssh", + ".aws", + ".gnupg", + ".netrc", + ".npmrc", + ".pypirc", + ".docker/config.json", + ".git-credentials", + ".kube", + ".azure", + ".config/gcloud", + ".config/gh", +) +_DEFAULT_DENY_READ_ABSOLUTE = ("/etc/shadow",) + +# Profiles ------------------------------------------------------------------- +# "execution": tight — reflects exactly what the AGENT may write. +# "fs_tools": widened — the fs-tools MCP server also writes temp + snapshot +# storage on the framework's behalf (e.g. snapshots), which the +# agent itself sees as read-only. +EXECUTION_PROFILE = "execution" +FS_TOOLS_PROFILE = "fs_tools" + + +# --------------------------------------------------------------------------- # +# Pure wrapping helpers — SINGLE SOURCE OF TRUTH. +# Imported by the code-execution MCP server subprocess too, so the wrapping is +# identical everywhere. +# --------------------------------------------------------------------------- # +def wrap_command_with_srt(command: str, settings_path: str | Path, srt_path: str = DEFAULT_SRT_BINARY) -> str: + """Wrap a shell command string so it runs under SRT. + + Returns a string suitable for ``subprocess.run(..., shell=True)``. The + original command is passed as a single quoted argument to ``sh -c`` so that + shell features (pipes, redirection) execute *inside* the sandbox rather than + in the outer, unsandboxed shell. + """ + import shlex + + return f"{srt_path} --settings {shlex.quote(str(settings_path))} sh -c {shlex.quote(command)}" + + +def wrap_argv_with_srt(argv: list[str], settings_path: str | Path, srt_path: str = DEFAULT_SRT_BINARY) -> list[str]: + """Wrap an argv list (e.g. ``["codex", "exec", ...]``) so it runs under SRT.""" + return [srt_path, "--settings", str(settings_path), *argv] + + +def srt_available(srt_path: str = DEFAULT_SRT_BINARY) -> bool: + """True if the `srt` binary is discoverable on PATH.""" + return shutil.which(srt_path) is not None + + +class SrtManager: + """Builds per-agent SRT settings and wraps commands. + + Mirrors the lightweight, contract-style shape of ``DockerManager`` (no shared + base class today). Holds a reference to the agent's ``PathPermissionManager`` + and reads ``managed_paths`` *lazily* at settings-build time, because paths are + added progressively during ``FilesystemManager`` setup. + """ + + def __init__( + self, + path_permission_manager: PathPermissionManager, + *, + network_allowed_domains: list[str] | None = None, + extra_deny_read: list[str] | None = None, + allow_unix_sockets: list[str] | None = None, + fs_tools_extra_writable: list[str | Path] | None = None, + settings_dir: str | Path | None = None, + srt_path: str = DEFAULT_SRT_BINARY, + ) -> None: + self.path_permission_manager = path_permission_manager + self.network_allowed_domains = list(network_allowed_domains or []) + self.extra_deny_read = list(extra_deny_read or []) + self.allow_unix_sockets = list(allow_unix_sockets or []) + self.fs_tools_extra_writable = [Path(p).resolve() for p in (fs_tools_extra_writable or [])] + self.settings_dir = Path(settings_dir) if settings_dir else None + self.srt_path = srt_path + + # ------------------------------------------------------------------ # + # Settings derivation + # ------------------------------------------------------------------ # + def build_settings(self, profile: str = EXECUTION_PROFILE) -> dict[str, Any]: + """Derive an SRT settings dict from the agent's path permissions. + + Reads are default-allowed by SRT; we only add protected paths to + ``denyRead``. Writes are default-denied; ``allowWrite`` is allow-only. + """ + managed = list(self.path_permission_manager.managed_paths) + + # Determine the set of writable paths for this profile. + writable: list[Path] = [mp.path for mp in managed if mp.permission == Permission.WRITE] + if profile == FS_TOOLS_PROFILE: + # The fs-tools server also writes temp workspaces (read-only to the + # agent) and the framework's snapshot storage. + writable += [mp.path for mp in managed if mp.path_type == "temp_workspace"] + writable += self.fs_tools_extra_writable + + writable_set = {str(p) for p in writable} + + # Per-context protected paths are immune from modification AND reading, even + # when they live inside a writable context dir. + protected_paths = [str(p) for mp in managed for p in (mp.protected_paths or [])] + + # Explicitly deny-write the read-only paths (belt-and-suspenders on top of + # SRT's allow-only write model), excluding anything we just made writable, + # plus the protected paths (immune even within a writable context). + deny_write = [str(mp.path) for mp in managed if mp.permission == Permission.READ and str(mp.path) not in writable_set] + deny_write += protected_paths + + # Reads: SRT defaults to allow-all, so deny the well-known secret stores + # (else a sandboxed `cat ~/.ssh/id_rsa` exfiltrates), plus per-context + # protected paths and any user-configured extras. + home = Path.home() + deny_read: list[str] = [str(home / rel) for rel in _DEFAULT_DENY_READ_HOME_RELATIVE] + deny_read += list(_DEFAULT_DENY_READ_ABSOLUTE) + deny_read += protected_paths + deny_read += list(self.extra_deny_read) + + return { + "filesystem": { + "allowWrite": sorted(writable_set), + "denyWrite": sorted(set(deny_write)), + "denyRead": sorted(set(deny_read)), + }, + "network": { + "allowedDomains": list(self.network_allowed_domains), + "deniedDomains": [], + "allowUnixSockets": list(self.allow_unix_sockets), + }, + } + + def write_settings_file(self, profile: str = EXECUTION_PROFILE, agent_id: str | None = None) -> Path: + """Write the settings for ``profile`` to a JSON file and return its path.""" + target_dir = self.settings_dir or Path.cwd() + target_dir.mkdir(parents=True, exist_ok=True) + suffix = f"{agent_id}-" if agent_id else "" + path = target_dir / f"srt-settings-{suffix}{profile}.json" + path.write_text(json.dumps(self.build_settings(profile=profile), indent=2)) + logger.info(f"[SrtManager] Wrote {profile} settings to {path}") + return path + + # ------------------------------------------------------------------ # + # Wrapping (instance convenience; delegate to the pure helpers) + # ------------------------------------------------------------------ # + def wrap_command(self, command: str, settings_path: str | Path) -> str: + return wrap_command_with_srt(command, settings_path, srt_path=self.srt_path) + + def wrap_argv(self, argv: list[str], settings_path: str | Path) -> list[str]: + return wrap_argv_with_srt(argv, settings_path, srt_path=self.srt_path) + + # ------------------------------------------------------------------ # + # Availability / platform guards + # ------------------------------------------------------------------ # + def verify_available(self) -> None: + """Raise an actionable error if SRT can't run here.""" + system = platform.system() + if system == "Windows": + raise RuntimeError( + "SRT sandboxing (command_line_execution_mode: srt) is not supported on Windows. " "Use 'docker' mode or run under Linux/macOS.", + ) + if shutil.which(self.srt_path) is None: + raise RuntimeError( + "SRT sandboxing requires the 'srt' CLI (Anthropic sandbox-runtime), which was not found on PATH. " "Install it with: npm install -g @anthropic-ai/sandbox-runtime", + ) diff --git a/massgen/mcp_tools/security.py b/massgen/mcp_tools/security.py index b6dc92839..c7fa2cad9 100644 --- a/massgen/mcp_tools/security.py +++ b/massgen/mcp_tools/security.py @@ -154,6 +154,10 @@ def _get_default_allowed_executables(level: str) -> set[str]: "rust", "cargo", "fastmcp", + # SRT (Anthropic sandbox-runtime) — trusted MassGen-controlled wrapper used + # only when command_line_execution_mode: srt is opted in. It prefixes a + # legitimate server command (e.g. `srt --settings fastmcp run ...`). + "srt", # MCP servers (when globally installed) "mcp-server-filesystem", # System utilities (limited set) diff --git a/massgen/subagent/manager.py b/massgen/subagent/manager.py index 430d88368..fefefcbf8 100644 --- a/massgen/subagent/manager.py +++ b/massgen/subagent/manager.py @@ -1918,6 +1918,23 @@ def _generate_subagent_yaml_config( elif setting in fallback_backend: backend_config[setting] = fallback_backend[setting] + # Inherit SRT settings if using srt mode (parity with Docker). Without + # this, a subagent inherits srt MODE but not the parent's network + # allowlist / read-denies, so commands that worked in the parent (e.g. + # an install reaching an allowlisted domain) would fail under the child's + # deny-all default, and parent-protected reads would become readable. + if backend_config.get("command_line_execution_mode") == "srt": + srt_settings = [ + "command_line_srt_network_allowed_domains", + "command_line_srt_deny_read", + "command_line_srt_allow_unix_sockets", + ] + for setting in srt_settings: + if setting in source_backend: + backend_config[setting] = source_backend[setting] + elif setting in fallback_backend: + backend_config[setting] = fallback_backend[setting] + # Inherit code-based tools settings code_tools_settings = [ "enable_code_based_tools", diff --git a/massgen/tests/test_srt_backend_degrade.py b/massgen/tests/test_srt_backend_degrade.py new file mode 100644 index 000000000..15f667d81 --- /dev/null +++ b/massgen/tests/test_srt_backend_degrade.py @@ -0,0 +1,64 @@ +"""Native-sandbox backends must degrade `command_line_execution_mode: srt` to local. + +Codex/claude_code self-sandbox (codex `--full-auto` = Landlock/Seatbelt). Wrapping +their MCP command execution in `srt` (another Seatbelt) nests sandboxes and hangs — +proven by a live smoke test where codex+srt timed out but codex+local succeeded. +SRT is only for backends WITHOUT a native execution sandbox. +""" + +from massgen.backend.base import FilesystemSupport, LLMBackend + + +class _StubBackend(LLMBackend): + """Minimal concrete backend (MCP filesystem support) for testing the degrade.""" + + _native_sandbox = False + + async def stream_with_tools(self, messages, tools, **kwargs): # pragma: no cover + if False: + yield + + def get_provider_name(self) -> str: + return "stub" + + def get_filesystem_support(self) -> FilesystemSupport: + return FilesystemSupport.MCP + + def has_native_execution_sandbox(self) -> bool: + return self._native_sandbox + + +def test_api_backend_without_native_sandbox_keeps_srt(tmp_path): + b = _StubBackend(cwd=str(tmp_path / "ws"), enable_mcp_command_line=True, command_line_execution_mode="srt") + assert b.filesystem_manager.command_line_execution_mode == "srt" + + +def test_native_sandbox_backend_degrades_srt_to_local(tmp_path): + class _NativeStub(_StubBackend): + _native_sandbox = True + + b = _NativeStub(cwd=str(tmp_path / "ws2"), enable_mcp_command_line=True, command_line_execution_mode="srt") + assert b.filesystem_manager.command_line_execution_mode == "local" + + +def test_degrade_does_not_touch_docker_or_local(tmp_path): + class _NativeStub(_StubBackend): + _native_sandbox = True + + b = _NativeStub(cwd=str(tmp_path / "ws3"), enable_mcp_command_line=True, command_line_execution_mode="local") + assert b.filesystem_manager.command_line_execution_mode == "local" + + +def test_base_default_has_no_native_execution_sandbox(tmp_path): + b = _StubBackend(cwd=str(tmp_path / "ws4"), enable_mcp_command_line=True, command_line_execution_mode="local") + assert b.has_native_execution_sandbox() is False + + +def test_codex_and_claude_code_declare_native_sandbox(): + from massgen.backend.claude_code import ClaudeCodeBackend + from massgen.backend.codex import CodexBackend + + # Method returns a constant; call it on an uninitialized instance to avoid + # heavy backend construction (CLI/auth/etc). + assert CodexBackend.has_native_execution_sandbox(object.__new__(CodexBackend)) is True + assert ClaudeCodeBackend.has_native_execution_sandbox(object.__new__(ClaudeCodeBackend)) is True diff --git a/massgen/tests/test_srt_filesystem_integration.py b/massgen/tests/test_srt_filesystem_integration.py new file mode 100644 index 000000000..f6f9cc2cf --- /dev/null +++ b/massgen/tests/test_srt_filesystem_integration.py @@ -0,0 +1,158 @@ +"""FilesystemManager ↔ SRT wiring tests (offline; no `srt` binary required). + +Covers the config-building side of SRT mode: + - command-line MCP server gets `--execution-mode srt --srt-settings ` + and a valid settings file is generated from the path policy + - the filesystem-tools MCP server is ALSO srt-wrapped (defense in depth) + - default (local) mode is completely unchanged +""" + +import json +from pathlib import Path + +import pytest + +from massgen.filesystem_manager._filesystem_manager import FilesystemManager + + +@pytest.fixture +def srt_fs_manager(tmp_path): + return FilesystemManager( + cwd=str(tmp_path / "workspace"), + enable_mcp_command_line=True, + command_line_execution_mode="srt", + command_line_srt_network_allowed_domains=["api.anthropic.com"], + ) + + +@pytest.fixture +def local_fs_manager(tmp_path): + return FilesystemManager( + cwd=str(tmp_path / "workspace"), + enable_mcp_command_line=True, + command_line_execution_mode="local", + ) + + +# --------------------------------------------------------------------------- # +# command-line MCP server +# --------------------------------------------------------------------------- # +def test_command_line_config_has_srt_args(srt_fs_manager): + config = srt_fs_manager.get_command_line_mcp_config() + args = config["args"] + assert "--execution-mode" in args + assert args[args.index("--execution-mode") + 1] == "srt" + assert "--srt-settings" in args + + +def test_command_line_config_writes_valid_settings_file(srt_fs_manager): + config = srt_fs_manager.get_command_line_mcp_config() + settings_path = Path(config["args"][config["args"].index("--srt-settings") + 1]) + assert settings_path.exists() + data = json.loads(settings_path.read_text()) + # Workspace is writable; the opt-in network allowlist is honored. + assert str(srt_fs_manager.cwd) in data["filesystem"]["allowWrite"] + assert data["network"]["allowedDomains"] == ["api.anthropic.com"] + + +def test_local_mode_has_no_srt_args(local_fs_manager): + config = local_fs_manager.get_command_line_mcp_config() + assert "--srt-settings" not in config["args"] + assert config["args"][config["args"].index("--execution-mode") + 1] == "local" + + +# --------------------------------------------------------------------------- # +# filesystem-tools MCP server (defense in depth) — srt-wrapped via the `sh -c` +# form so srt cannot eat the server's `--` separator (which broke the handshake +# in the direct-argv form). Must keep the workspace writable + pass MCP security. +# --------------------------------------------------------------------------- # +def test_fs_tools_server_is_srt_wrapped_via_sh_c(srt_fs_manager): + config = srt_fs_manager.get_workspace_tools_mcp_config() + assert config["command"] == "srt" + assert config["args"][0] == "--settings" + fs_settings = Path(config["args"][1]) + assert fs_settings.exists() + # CRITICAL: sh -c form, with the original fastmcp command line (incl. `--`) + # preserved inside the shell string. + assert config["args"][2] == "sh" + assert config["args"][3] == "-c" + inner = config["args"][4] + assert inner.startswith("fastmcp run ") + assert " -- --allowed-paths " in inner # the `--` separator survives + data = json.loads(fs_settings.read_text()) + assert str(srt_fs_manager.cwd) in data["filesystem"]["allowWrite"] + + +def test_fs_tools_server_not_wrapped_in_local_mode(local_fs_manager): + config = local_fs_manager.get_workspace_tools_mcp_config() + assert config["command"] == "fastmcp" + assert config["args"][0] == "run" + + +# --------------------------------------------------------------------------- # +# MCP security allowlist — `srt` must be an accepted MCP server executable, and +# the wrapped config must pass full security validation. +# Regression origin: a live smoke test, not the dict-shape tests above. +# --------------------------------------------------------------------------- # +def test_srt_is_an_allowed_mcp_executable(): + from massgen.mcp_tools.security import _get_default_allowed_executables + + for level in ("strict", "moderate", "permissive"): + assert "srt" in _get_default_allowed_executables(level) + + +def test_srt_wrapped_fs_tools_config_passes_mcp_security(srt_fs_manager): + from massgen.mcp_tools.security import validate_server_security + + config = srt_fs_manager.get_workspace_tools_mcp_config() + assert config["command"] == "srt" + validate_server_security(config) # must NOT raise + + +# --------------------------------------------------------------------------- # +# npx/npm launchers can't be srt-wrapped (registry + ~/.npm cache writes are +# blocked by the sandbox → E403/EPERM). They must be SKIPPED (keep app-layer +# protection), while non-network launchers (python3/fastmcp/global binary) wrap. +# --------------------------------------------------------------------------- # +def test_wrap_skips_npx_launcher(srt_fs_manager): + npx_cfg = { + "name": "filesystem", + "type": "stdio", + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-filesystem", "/ws"], + } + out = srt_fs_manager._wrap_stdio_config_with_srt(dict(npx_cfg)) + assert out["command"] == "npx" # unchanged — not wrapped + + +def test_wrap_applies_to_non_network_launcher(srt_fs_manager): + # A plain python3/fastmcp launcher with no npx dependency IS wrapped. + py_cfg = { + "name": "filesystem", + "type": "stdio", + "command": "python3", + "args": ["/path/my_server.py", "/ws"], + } + out = srt_fs_manager._wrap_stdio_config_with_srt(dict(py_cfg)) + assert out["command"] == "srt" + assert out["args"][2:4] == ["sh", "-c"] + assert out["args"][4].startswith("python3 ") + + +def test_wrap_skips_no_roots_wrapper(srt_fs_manager): + # The no-roots wrapper runs as python3 but INTERNALLY spawns npx → must be skipped. + cfg = { + "name": "filesystem", + "type": "stdio", + "command": "python3", + "args": ["/abs/massgen/mcp_tools/filesystem_no_roots.py", "/ws"], + } + out = srt_fs_manager._wrap_stdio_config_with_srt(dict(cfg)) + assert out["command"] == "python3" # unchanged — not wrapped + + +def test_wrap_skips_absolute_path_npx(srt_fs_manager): + # A token-based skip must catch an absolute-path npx (not just bare "npx"). + cfg = {"name": "filesystem", "type": "stdio", "command": "/usr/local/bin/npx", "args": ["server"]} + out = srt_fs_manager._wrap_stdio_config_with_srt(dict(cfg)) + assert out["command"] == "/usr/local/bin/npx" # unchanged — not wrapped diff --git a/massgen/tests/test_srt_manager.py b/massgen/tests/test_srt_manager.py new file mode 100644 index 000000000..ed87bb65c --- /dev/null +++ b/massgen/tests/test_srt_manager.py @@ -0,0 +1,206 @@ +"""Unit tests for SrtManager — OS-level SRT (sandbox-runtime) sandboxing. + +These tests are pure/offline: they never require the `srt` binary or bubblewrap. +They cover: + - settings derivation from PathPermissionManager (defense-in-depth: SRT settings + derive from the SAME path policy as the app-level permission layer) + - the two profiles ("execution" tight; "fs_tools" widened for snapshots) + - network deny-all-by-default with opt-in allowlist (capability grant) + - command/argv wrapping (single source of truth shared with the MCP server) + - availability/platform guards +""" + +import json + +import pytest + +from massgen.filesystem_manager._base import Permission +from massgen.filesystem_manager._path_permission_manager import PathPermissionManager +from massgen.filesystem_manager._srt_manager import ( + SrtManager, + srt_available, + wrap_argv_with_srt, + wrap_command_with_srt, +) + + +@pytest.fixture +def pm_with_paths(tmp_path): + """A PathPermissionManager populated like a real agent setup.""" + workspace = tmp_path / "workspace" + temp_ws = tmp_path / "temp_ws" + ctx_write = tmp_path / "ctx_write" + ctx_read = tmp_path / "ctx_read" + protected = ctx_write / "secrets" + for d in (workspace, temp_ws, ctx_write, ctx_read, protected): + d.mkdir(parents=True, exist_ok=True) + + pm = PathPermissionManager(context_write_access_enabled=True) + pm.add_path(workspace, Permission.WRITE, "workspace") + pm.add_path(temp_ws, Permission.READ, "temp_workspace") + pm.add_context_paths( + [ + {"path": str(ctx_write), "permission": "write", "protected_paths": ["secrets"]}, + {"path": str(ctx_read), "permission": "read"}, + ], + ) + return { + "pm": pm, + "workspace": workspace.resolve(), + "temp_ws": temp_ws.resolve(), + "ctx_write": ctx_write.resolve(), + "ctx_read": ctx_read.resolve(), + "protected": protected.resolve(), + } + + +# --------------------------------------------------------------------------- # +# build_settings — execution profile +# --------------------------------------------------------------------------- # +def test_execution_profile_workspace_and_write_context_are_writable(pm_with_paths): + mgr = SrtManager(pm_with_paths["pm"]) + settings = mgr.build_settings(profile="execution") + allow_write = settings["filesystem"]["allowWrite"] + assert str(pm_with_paths["workspace"]) in allow_write + assert str(pm_with_paths["ctx_write"]) in allow_write + + +def test_execution_profile_temp_and_read_context_are_not_writable(pm_with_paths): + mgr = SrtManager(pm_with_paths["pm"]) + settings = mgr.build_settings(profile="execution") + allow_write = settings["filesystem"]["allowWrite"] + deny_write = settings["filesystem"]["denyWrite"] + # Temp workspace is read-only for the agent during coordination. + assert str(pm_with_paths["temp_ws"]) not in allow_write + assert str(pm_with_paths["temp_ws"]) in deny_write + assert str(pm_with_paths["ctx_read"]) in deny_write + + +def test_protected_paths_are_deny_read(pm_with_paths): + mgr = SrtManager(pm_with_paths["pm"], extra_deny_read=["/some/extra/secret"]) + settings = mgr.build_settings(profile="execution") + deny_read = settings["filesystem"]["denyRead"] + assert str(pm_with_paths["protected"]) in deny_read + assert "/some/extra/secret" in deny_read + + +def test_protected_paths_are_also_deny_write(pm_with_paths): + # Protected paths are immune from modification even inside a writable context. + mgr = SrtManager(pm_with_paths["pm"]) + settings = mgr.build_settings(profile="execution") + assert str(pm_with_paths["protected"]) in settings["filesystem"]["denyWrite"] + + +def test_secret_stores_are_deny_read_by_default(pm_with_paths): + # SRT reads are allow-all by default; the manager must deny known secret stores + # so a sandboxed `cat ~/.ssh/id_rsa` is blocked. + from pathlib import Path + + mgr = SrtManager(pm_with_paths["pm"]) + deny_read = mgr.build_settings(profile="execution")["filesystem"]["denyRead"] + home = Path.home() + for rel in (".ssh", ".aws", ".gnupg", ".config/gcloud"): + assert str(home / rel) in deny_read, f"{rel} should be read-denied by default" + assert "/etc/shadow" in deny_read + + +# --------------------------------------------------------------------------- # +# build_settings — fs_tools profile (defense in depth, must allow snapshots) +# --------------------------------------------------------------------------- # +def test_fs_tools_profile_widens_writes_for_temp_and_snapshot(pm_with_paths, tmp_path): + snapshot = tmp_path / "snapshot_storage" + snapshot.mkdir() + mgr = SrtManager(pm_with_paths["pm"], fs_tools_extra_writable=[snapshot]) + settings = mgr.build_settings(profile="fs_tools") + allow_write = settings["filesystem"]["allowWrite"] + # The fs-tools SERVER must be able to write workspace + temp + snapshot_storage, + # even though the AGENT sees temp as read-only. + assert str(pm_with_paths["workspace"]) in allow_write + assert str(pm_with_paths["temp_ws"]) in allow_write + assert str(snapshot.resolve()) in allow_write + + +# --------------------------------------------------------------------------- # +# network — deny-all by default, opt-in allowlist +# --------------------------------------------------------------------------- # +def test_network_default_deny_all(pm_with_paths): + mgr = SrtManager(pm_with_paths["pm"]) + settings = mgr.build_settings(profile="execution") + assert settings["network"]["allowedDomains"] == [] + + +def test_network_allowlist_is_opt_in(pm_with_paths): + mgr = SrtManager(pm_with_paths["pm"], network_allowed_domains=["api.anthropic.com"]) + settings = mgr.build_settings(profile="execution") + assert settings["network"]["allowedDomains"] == ["api.anthropic.com"] + + +def test_allow_unix_sockets_passthrough(pm_with_paths): + mgr = SrtManager(pm_with_paths["pm"], allow_unix_sockets=["/var/run/docker.sock"]) + settings = mgr.build_settings(profile="execution") + assert settings["network"]["allowUnixSockets"] == ["/var/run/docker.sock"] + + +# --------------------------------------------------------------------------- # +# write_settings_file +# --------------------------------------------------------------------------- # +def test_write_settings_file_produces_valid_json(pm_with_paths, tmp_path): + mgr = SrtManager(pm_with_paths["pm"], settings_dir=tmp_path / "srt") + path = mgr.write_settings_file(profile="execution", agent_id="agent_a") + assert path.exists() + data = json.loads(path.read_text()) + assert "filesystem" in data and "network" in data + assert str(pm_with_paths["workspace"]) in data["filesystem"]["allowWrite"] + + +# --------------------------------------------------------------------------- # +# wrapping — single source of truth shared with the MCP server +# --------------------------------------------------------------------------- # +def test_wrap_command_string_form(): + assert wrap_command_with_srt("echo hi", "/tmp/cfg.json") == "srt --settings /tmp/cfg.json sh -c 'echo hi'" + + +def test_wrap_command_quotes_shell_metacharacters(): + wrapped = wrap_command_with_srt("echo hi | grep h", "/tmp/cfg.json") + # The original command must be passed as a single quoted argument to `sh -c`, + # so the pipe runs INSIDE the sandbox, not in the outer (unsandboxed) shell. + assert wrapped == "srt --settings /tmp/cfg.json sh -c 'echo hi | grep h'" + + +def test_wrap_argv_list_form(): + assert wrap_argv_with_srt(["codex", "exec", "--json"], "/tmp/cfg.json") == [ + "srt", + "--settings", + "/tmp/cfg.json", + "codex", + "exec", + "--json", + ] + + +def test_custom_srt_binary_path(): + assert wrap_argv_with_srt(["x"], "/c.json", srt_path="/opt/srt")[0] == "/opt/srt" + + +# --------------------------------------------------------------------------- # +# availability / platform guards +# --------------------------------------------------------------------------- # +def test_srt_available_false_when_missing(monkeypatch): + monkeypatch.setattr("massgen.filesystem_manager._srt_manager.shutil.which", lambda _: None) + assert srt_available() is False + + +def test_verify_available_raises_actionable_error_when_missing(monkeypatch, pm_with_paths): + monkeypatch.setattr("massgen.filesystem_manager._srt_manager.platform.system", lambda: "Darwin") + monkeypatch.setattr("massgen.filesystem_manager._srt_manager.shutil.which", lambda _: None) + mgr = SrtManager(pm_with_paths["pm"]) + with pytest.raises(RuntimeError, match="sandbox-runtime"): + mgr.verify_available() + + +def test_verify_available_raises_on_windows(monkeypatch, pm_with_paths): + monkeypatch.setattr("massgen.filesystem_manager._srt_manager.platform.system", lambda: "Windows") + monkeypatch.setattr("massgen.filesystem_manager._srt_manager.shutil.which", lambda _: "C:/srt.exe") + mgr = SrtManager(pm_with_paths["pm"]) + with pytest.raises(RuntimeError, match="(?i)windows"): + mgr.verify_available() diff --git a/massgen/tests/test_subagent_manager.py b/massgen/tests/test_subagent_manager.py index d206742b5..a38cfbced 100644 --- a/massgen/tests/test_subagent_manager.py +++ b/massgen/tests/test_subagent_manager.py @@ -3893,3 +3893,36 @@ def test_returns_copy_not_reference(self): env = SubagentManager._clean_subprocess_env() env["__TEST_SENTINEL__"] = "yes" assert os.environ.get("__TEST_SENTINEL__") is None + + +class TestSrtSettingsInheritance: + """SRT settings must propagate from parent to subagent (parity with Docker).""" + + def test_srt_settings_inherited_by_subagent(self, tmp_path): + from pathlib import Path + + from massgen.subagent.manager import SubagentManager + from massgen.subagent.models import SubagentConfig + + parent_backend = { + "type": "openai", + "model": "gpt-5", + "cwd": "ws", + "enable_mcp_command_line": True, + "command_line_execution_mode": "srt", + "command_line_srt_network_allowed_domains": ["pypi.org"], + "command_line_srt_deny_read": ["/x/.secret"], + } + parent_cfg = {"id": "parent", "backend": parent_backend} + mgr = SubagentManager( + parent_workspace=str(tmp_path), + parent_agent_id="parent", + orchestrator_id="orch", + parent_agent_configs=[parent_cfg], + ) + cfg = SubagentConfig.create(task="do x", parent_agent_id="parent") + out = mgr._generate_subagent_yaml_config(cfg, Path(tmp_path) / "sub") + child_backend = out["agents"][0]["backend"] + assert child_backend["command_line_execution_mode"] == "srt" + assert child_backend["command_line_srt_network_allowed_domains"] == ["pypi.org"] + assert child_backend["command_line_srt_deny_read"] == ["/x/.secret"] From 83f9a872c52fdb03c8448cadbc6b4eb846d153dc Mon Sep 17 00:00:00 2001 From: ncrispino Date: Tue, 9 Jun 2026 21:29:46 -0700 Subject: [PATCH 2/7] feat(security): harden PathPermissionManager hook against file-tool sandbox escapes Adds a key-agnostic escape scan (_validate_no_path_arg_escapes) that walks the full tool-args tree (nested dicts + lists) and denies any value resolving outside all managed areas. Closes fail-open gaps surfaced by an adversarial audit: - path under an unrecognized arg key (e.g. output_path/dst) bypassed the boundary - list-valued and nested-dict path args were never checked - move/copy 'source' pointing outside (delete-external / exfiltrate-external) No false positives: non-path strings resolve harmlessly inside the workspace, and content-bearing keys are skipped. Symlinks/.. were already handled via .resolve(). 15-vector adversarial test suite added. Co-Authored-By: Claude Opus 4.8 (1M context) --- PR_DRAFT_sandboxing.md | 50 +++++++ .../_path_permission_manager.py | 105 +++++++++++++ .../test_path_permission_hook_adversarial.py | 140 ++++++++++++++++++ 3 files changed, 295 insertions(+) create mode 100644 PR_DRAFT_sandboxing.md create mode 100644 massgen/tests/test_path_permission_hook_adversarial.py diff --git a/PR_DRAFT_sandboxing.md b/PR_DRAFT_sandboxing.md new file mode 100644 index 000000000..a63156d95 --- /dev/null +++ b/PR_DRAFT_sandboxing.md @@ -0,0 +1,50 @@ +# PR Draft: OS-Level Agent Sandboxing (SRT) + Permission-Hook Hardening + +**Branch:** `feat/better-sandboxing` + +## Summary + +Adds **OS-level execution sandboxing** for agents via Anthropic's [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) (`srt`: bubblewrap/Linux, Seatbelt/macOS), and **hardens the existing application-layer permission hook** against file-tool sandbox escapes. Default-off, one-knob opt-in; current behavior is unchanged unless a config sets `command_line_execution_mode: srt`. + +Defense in depth, by design: the OS layer (SRT) and the app layer (`PathPermissionManager`) are derived from the **same** path policy and both stay active. SRT closes the shell escape hatch (e.g. `echo x > /etc/passwd`); the hardened hook closes file-tool escapes (`write_file`/`move`/`copy` to/from outside the workspace). + +## What's included + +### 1. SRT sandbox mode (`command_line_execution_mode: srt`) +- **`SrtManager`** (`massgen/filesystem_manager/_srt_manager.py`) — derives per-agent SRT settings from `PathPermissionManager.managed_paths`: `allowWrite` = writable paths, `denyWrite`/`denyRead` for read-only/protected paths, **network deny-all by default** (allowlist is opt-in, documented as a capability grant), and a **built-in read-deny baseline for secret stores** (`~/.ssh`, `~/.aws`, `~/.gnupg`, cloud creds, `/etc/shadow`, …) since SRT reads are otherwise allow-all. +- **Command-line MCP** wraps each executed command: `srt --settings cfg sh -c ''`. +- **Filesystem-tools MCP servers** are OS-wrapped too (defense in depth), via the **`sh -c` form** — required because `srt` otherwise consumes the server's `--` separator. **npx/npm launchers (and the no-roots wrapper that spawns npx) auto-skip** wrapping (they need the registry + `~/.npm` writes the sandbox blocks) and keep their app-layer protection. +- **Native-sandbox backends degrade `srt`→`local`**: `has_native_execution_sandbox()` (True for `codex` `--full-auto` and `claude_code`) prevents nested Seatbelt/Landlock hangs; the stored config is normalized so downstream raw reads see `local`. +- **Subagents inherit** the parent's `command_line_srt_*` settings (parity with Docker). +- New backend params `command_line_srt_network_allowed_domains` / `_deny_read` / `_allow_unix_sockets` added to the single-source exclusion list; `srt` added to the MCP executable allowlist. +- Config example: `massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml`. + +### 2. Permission-hook hardening (`PathPermissionManager`) +- New `_validate_no_path_arg_escapes`: a **key-agnostic scan** that walks the full tool-args tree (nested dicts + lists) and denies any value resolving outside all managed areas. Closes the prior **fail-open** behavior (path under an unrecognized key, list-valued path, or move/copy `source` pointing outside) without false positives (non-path strings resolve harmlessly inside the workspace; content keys are skipped). Symlinks/`..` were already handled by `.resolve()`. + +## Tests + +| File | Covers | +|------|--------| +| `test_srt_manager.py` | settings derivation, profiles, secret read-deny baseline, protected-path read+write deny, wrapping, availability guards | +| `test_srt_filesystem_integration.py` | command-line + fs-tools config wiring, `sh -c` wrap, npx / no-roots auto-skip, MCP-security validation | +| `test_srt_backend_degrade.py` | `srt`→`local` degrade for native-sandbox backends; API backends keep `srt` | +| `test_path_permission_hook_adversarial.py` | 15 escape vectors (absolute/`..`/symlink/unrecognized-key/list/nested-dict/move-source/copy-source/read-exfil) + false-positive guards | +| `test_subagent_manager.py::TestSrtSettingsInheritance` | subagent inherits parent srt settings | + +## Live verification (macOS 15.7, srt 1.0.0) +- Standalone srt: allowed-write ✓, out-of-scope write blocked ✓, deny-all network blocked ✓, **secret read blocked** ✓. +- **3 API backends** (openrouter/`chatcompletion`, OpenAI Responses/`openai`, Gemini/`gemini`): workspace write OK; out-of-workspace write → `Operation not permitted`; file-tool escape blocked. +- **codex + srt** and **claude_code + srt**: degrade to local, run via native sandbox, complete. + +## Pre-merge quality gate +A multi-agent code review (correctness/security/parity/tests, adversarially verified) was run on the diff; **all 15 confirmed findings fixed** — most notably a HIGH read-confinement hole (SRT reads were default-allow) and a subagent settings-inheritance parity gap. + +## Known follow-ups (not in this PR) +- `write_file`/`edit_file` (npx filesystem server) is app-layer-only; full OS coverage needs a globally-installed (non-npx) filesystem server. +- Network-egress MITM / per-agent credential scoping (allowlist-only egress can leak via embedded API keys). +- claude_code native-sandbox lever via `ClaudeAgentOptions`. + +## Configs used to test +- `massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml` (committed) +- Throwaway smoke configs (openrouter/openai/gemini/codex/claude_code + srt) under `/tmp/srt_smoke/` (not committed). diff --git a/massgen/filesystem_manager/_path_permission_manager.py b/massgen/filesystem_manager/_path_permission_manager.py index 027cc78f2..0ce65b05f 100644 --- a/massgen/filesystem_manager/_path_permission_manager.py +++ b/massgen/filesystem_manager/_path_permission_manager.py @@ -1119,6 +1119,13 @@ def _validate_file_context_access(self, tool_name: str, tool_args: dict[str, Any Returns: Tuple of (allowed: bool, reason: Optional[str]) """ + # Defense in depth: deny if ANY argument (under any key, incl. lists) + # resolves outside every managed area — catches write/read-capable tools + # whose name doesn't match the write patterns and would otherwise fail-open. + escape_check = self._validate_no_path_arg_escapes(tool_args) + if not escape_check[0]: + return escape_check + # Extract file path from arguments file_path = self._extract_file_path(tool_args) if not file_path: @@ -1152,6 +1159,13 @@ def _validate_file_context_access(self, tool_name: str, tool_args: dict[str, Any def _validate_write_tool(self, tool_name: str, tool_args: dict[str, Any]) -> tuple[bool, str | None]: """Validate write tool access.""" + # Defense in depth: no argument (under any key, incl. lists / `source`) may + # resolve outside allowed directories. Closes the fail-open gap where a path + # under an unrecognized key bypasses the primary extractor. + escape_check = self._validate_no_path_arg_escapes(tool_args) + if not escape_check[0]: + return escape_check + # Special handling for copy_files_batch - validate all destination paths after globbing if tool_name == "copy_files_batch": return self._validate_copy_files_batch(tool_args) @@ -1389,6 +1403,97 @@ def _validate_command_tool(self, tool_name: str, tool_args: dict[str, Any]) -> t return (True, None) + # Argument keys that carry CONTENT (text/data), never filesystem paths. Skipped + # by the key-agnostic escape scan so a path-looking string inside file content + # isn't mistaken for a target path. + _CONTENT_ARG_KEYS = frozenset( + { + "content", + "contents", + "text", + "data", + "body", + "old_string", + "new_string", + "old_str", + "new_str", + "patch", + "diff", + "message", + "prompt", + "query", + "command", + "code", + "snippet", + "description", + "instructions", + # search patterns may legitimately be absolute-path-looking regex/globs + "pattern", + "regex", + "glob", + }, + ) + + def _is_path_within_any_managed_area(self, path: Path) -> bool: + """Like ``_is_path_within_allowed_directories`` but ALSO counts + ``file_context_parent`` dirs as inside. + + Used by the escape scan so it only flags paths that are TRULY outside every + managed area; finer-grained denials (e.g. a sibling file inside a + file-context directory) are left to the downstream permission logic, which + gives a more specific reason. + """ + resolved = path.resolve() + for managed_path in self.managed_paths: + if managed_path.contains(resolved) or managed_path.path == resolved: + return True + return False + + def _validate_no_path_arg_escapes(self, tool_args: dict[str, Any]) -> tuple[bool, str | None]: + """Defense in depth: deny if ANY argument value resolves outside allowed dirs. + + Closes the fail-open gap in the primary extractor: a path under an + unrecognized key, inside a list, or in a move/copy ``source`` (which the + extractor deliberately skips) would otherwise bypass the boundary check. + + Safe against false positives: non-path strings and relative values resolve + harmlessly *inside* the workspace, so only genuine absolute-outside / + ``..``-escape / symlink-escape values are denied. Content-bearing keys are + skipped so file content that merely mentions a path isn't flagged. + + Walks the FULL argument tree (nested dicts and lists), so a path buried under + e.g. ``{"opts": {"path": "/etc/passwd"}}`` or ``{"items": [{"path": ...}]}`` + is caught, not just top-level keys. + """ + + def walk(obj: Any, key: str | None): + if isinstance(obj, str): + yield (key, obj) + elif isinstance(obj, dict): + for k, v in obj.items(): + if k in self._CONTENT_ARG_KEYS: + continue + yield from walk(v, k) + elif isinstance(obj, (list, tuple)): + for item in obj: + yield from walk(item, key) + + for key, cand in walk(tool_args, None): + if not cand: + continue + if "\x00" in cand: + return (False, f"Access denied: argument '{key}' contains a null byte.") + try: + resolved = Path(self._resolve_path_against_workspace(cand)).resolve() + except (OSError, ValueError): + continue + if not self._is_path_within_any_managed_area(resolved): + return ( + False, + f"Access denied: argument '{key}'='{cand}' resolves to '{resolved}', outside allowed directories.", + ) + return (True, None) + def _extract_file_path(self, tool_args: dict[str, Any]) -> str | None: """Extract file path from tool arguments.""" # Common argument names for file paths: diff --git a/massgen/tests/test_path_permission_hook_adversarial.py b/massgen/tests/test_path_permission_hook_adversarial.py new file mode 100644 index 000000000..b7826475d --- /dev/null +++ b/massgen/tests/test_path_permission_hook_adversarial.py @@ -0,0 +1,140 @@ +"""Adversarial audit of PathPermissionManager.pre_tool_use_hook — the app-layer +that gates every MCP file tool (and the *only* layer for non-srt-wrapped servers). + +Each test below is an attempted SANDBOX ESCAPE via a file tool; the hook MUST deny +it. Vectors: out-of-workspace absolute paths, `..` traversal, symlink-through, +UNRECOGNIZED path-arg keys (fail-open), list-valued paths, and move/copy `source` +pointing outside (delete-external / exfiltrate-external). +""" + +import os + +import pytest + +from massgen.filesystem_manager._base import Permission +from massgen.filesystem_manager._path_permission_manager import PathPermissionManager + + +@pytest.fixture +def pm(tmp_path): + workspace = tmp_path / "workspace" + outside = tmp_path / "outside" + secret = outside / "secret.txt" + for d in (workspace, outside): + d.mkdir(parents=True, exist_ok=True) + secret.write_text("TOP SECRET") + m = PathPermissionManager(context_write_access_enabled=True) + m.add_path(workspace, Permission.WRITE, "workspace") + return {"m": m, "workspace": workspace.resolve(), "outside": outside.resolve(), "secret": secret.resolve()} + + +async def _denied(m, tool, args): + allowed, _reason = await m.pre_tool_use_hook(tool, args) + return not allowed + + +# --------------------------------------------------------------------------- # +# Baselines that should already hold (resolve() handles these) +# --------------------------------------------------------------------------- # +@pytest.mark.asyncio +async def test_absolute_outside_write_denied(pm): + assert await _denied(pm["m"], "write_file", {"path": str(pm["outside"] / "evil.txt"), "content": "x"}) + + +@pytest.mark.asyncio +async def test_dotdot_traversal_write_denied(pm): + evil = str(pm["workspace"] / ".." / "outside" / "evil.txt") + assert await _denied(pm["m"], "write_file", {"path": evil, "content": "x"}) + + +@pytest.mark.asyncio +async def test_symlink_through_workspace_denied(pm): + link = pm["workspace"] / "link" + os.symlink(str(pm["outside"]), str(link)) + assert await _denied(pm["m"], "write_file", {"path": str(link / "evil.txt"), "content": "x"}) + + +# --------------------------------------------------------------------------- # +# The real gaps (these are expected to FAIL pre-hardening = currently ALLOWED) +# --------------------------------------------------------------------------- # +@pytest.mark.asyncio +async def test_unrecognized_path_key_write_denied(pm): + # path under a key not in the known list → fail-open today. + assert await _denied(pm["m"], "write_file", {"output_path": str(pm["outside"] / "evil.txt"), "content": "x"}) + + +@pytest.mark.asyncio +async def test_arbitrary_key_absolute_path_write_denied(pm): + assert await _denied(pm["m"], "store_blob", {"dst": str(pm["outside"] / "evil.txt"), "content": "x"}) + + +@pytest.mark.asyncio +async def test_list_valued_path_write_denied(pm): + assert await _denied(pm["m"], "write_files", {"paths": [str(pm["outside"] / "evil.txt")], "content": "x"}) + + +@pytest.mark.asyncio +async def test_move_source_outside_denied(pm): + # move deletes the source — a source outside the sandbox must be denied. + assert await _denied(pm["m"], "move_file", {"source": str(pm["secret"]), "destination": str(pm["workspace"] / "x")}) + + +@pytest.mark.asyncio +async def test_copy_source_outside_denied(pm): + # copy reads the source into the workspace — reading an external file is exfiltration. + assert await _denied(pm["m"], "copy_file", {"source_path": str(pm["secret"]), "destination_path": str(pm["workspace"] / "x")}) + + +# --------------------------------------------------------------------------- # +# Must NOT over-block legitimate in-workspace use (guard against false positives) +# --------------------------------------------------------------------------- # +@pytest.mark.asyncio +async def test_in_workspace_write_allowed(pm): + allowed, _ = await pm["m"].pre_tool_use_hook("write_file", {"path": str(pm["workspace"] / "ok.txt"), "content": "hi"}) + assert allowed + + +@pytest.mark.asyncio +async def test_content_with_pathlike_text_not_blocked(pm): + # 'content' holds text that merely looks like a path — must not be treated as a path. + allowed, _ = await pm["m"].pre_tool_use_hook( + "write_file", + {"path": str(pm["workspace"] / "ok.txt"), "content": "see /etc/passwd for details"}, + ) + assert allowed + + +@pytest.mark.asyncio +async def test_content_equal_to_absolute_path_not_blocked(pm): + # The whole content value being an absolute path is still CONTENT (written into a + # workspace file), not a target — must not be denied. + allowed, _ = await pm["m"].pre_tool_use_hook( + "write_file", + {"path": str(pm["workspace"] / "cfg"), "content": str(pm["secret"])}, + ) + assert allowed + + +# --------------------------------------------------------------------------- # +# Deeper vectors closed by the review-driven hardening +# --------------------------------------------------------------------------- # +@pytest.mark.asyncio +async def test_nested_dict_path_escape_denied(pm): + assert await _denied(pm["m"], "write_file", {"opts": {"path": str(pm["outside"] / "evil.txt")}, "content": "x"}) + + +@pytest.mark.asyncio +async def test_dict_in_list_path_escape_denied(pm): + assert await _denied(pm["m"], "write_files", {"items": [{"target": str(pm["outside"] / "evil.txt")}]}) + + +@pytest.mark.asyncio +async def test_value_key_escape_denied(pm): + # 'value' is no longer treated as a content key → a path under it is validated. + assert await _denied(pm["m"], "store", {"value": str(pm["outside"] / "evil.txt")}) + + +@pytest.mark.asyncio +async def test_read_tool_unrecognized_key_exfil_denied(pm): + # A read-capable tool with the path under an unrecognized key must not exfiltrate. + assert await _denied(pm["m"], "fetch_resource", {"location": str(pm["secret"])}) From e5277a0a9c02a56f009608a0621d7232cf6b8e00 Mon Sep 17 00:00:00 2001 From: ncrispino Date: Tue, 9 Jun 2026 22:13:15 -0700 Subject: [PATCH 3/7] feat(sandbox): configurable SRT read confinement (default: confined to workspace+context) SRT reads are allow-all by default, so the prior secret-denylist still left the whole filesystem readable. Add command_line_srt_read_mode: - confined (default): denyRead=$HOME, allowRead=workspace+context+temp; system paths outside $HOME stay readable so commands still run. Denies personal data, secrets, and other projects. - strict: denyRead='/', allowRead=managed + system runtime baseline + extras. - open: allow-all reads minus a built-in secret denylist + extras. Plus command_line_srt_allow_read to widen the allow-list per config. Both params wired through base.py (+validation), the single-source exclusion list, and FilesystemManager. Note: allowRead wins over denyRead in SRT, so protected sub-paths inside an allowed context are read-deniable only in 'open' mode (their write-immunity still holds everywhere). Live-verified: confined denies $HOME reads while python/system reads and workspace I/O keep working. Co-Authored-By: Claude Opus 4.8 (1M context) --- PR_DRAFT_sandboxing.md | 2 +- massgen/backend/_excluded_params.py | 2 + massgen/backend/base.py | 11 +++ .../tools/filesystem/sandbox/srt_sandbox.yaml | 24 ++++-- .../filesystem_manager/_filesystem_manager.py | 6 ++ massgen/filesystem_manager/_srt_manager.py | 74 +++++++++++++++++-- massgen/tests/test_srt_manager.py | 63 ++++++++++++---- 7 files changed, 150 insertions(+), 32 deletions(-) diff --git a/PR_DRAFT_sandboxing.md b/PR_DRAFT_sandboxing.md index a63156d95..c3eecc157 100644 --- a/PR_DRAFT_sandboxing.md +++ b/PR_DRAFT_sandboxing.md @@ -11,7 +11,7 @@ Defense in depth, by design: the OS layer (SRT) and the app layer (`PathPermissi ## What's included ### 1. SRT sandbox mode (`command_line_execution_mode: srt`) -- **`SrtManager`** (`massgen/filesystem_manager/_srt_manager.py`) — derives per-agent SRT settings from `PathPermissionManager.managed_paths`: `allowWrite` = writable paths, `denyWrite`/`denyRead` for read-only/protected paths, **network deny-all by default** (allowlist is opt-in, documented as a capability grant), and a **built-in read-deny baseline for secret stores** (`~/.ssh`, `~/.aws`, `~/.gnupg`, cloud creds, `/etc/shadow`, …) since SRT reads are otherwise allow-all. +- **`SrtManager`** (`massgen/filesystem_manager/_srt_manager.py`) — derives per-agent SRT settings from `PathPermissionManager.managed_paths`: `allowWrite` = writable paths, `denyWrite` for read-only/protected paths, **network deny-all by default** (allowlist is opt-in, documented as a capability grant), and a **configurable READ-confinement policy** (`command_line_srt_read_mode`, default `confined`): SRT reads are allow-all by default, so `confined` denies all of `$HOME` and re-allows only the workspace+context (system paths stay readable so commands run); `strict` denies `/` and allows only managed + a system baseline; `open` allows-all minus a secret denylist. `command_line_srt_allow_read` widens it per config. - **Command-line MCP** wraps each executed command: `srt --settings cfg sh -c ''`. - **Filesystem-tools MCP servers** are OS-wrapped too (defense in depth), via the **`sh -c` form** — required because `srt` otherwise consumes the server's `--` separator. **npx/npm launchers (and the no-roots wrapper that spawns npx) auto-skip** wrapping (they need the registry + `~/.npm` writes the sandbox blocks) and keep their app-layer protection. - **Native-sandbox backends degrade `srt`→`local`**: `has_native_execution_sandbox()` (True for `codex` `--full-auto` and `claude_code`) prevents nested Seatbelt/Landlock hangs; the stored config is normalized so downstream raw reads see `local`. diff --git a/massgen/backend/_excluded_params.py b/massgen/backend/_excluded_params.py index 2261cc3b1..91344785e 100644 --- a/massgen/backend/_excluded_params.py +++ b/massgen/backend/_excluded_params.py @@ -42,6 +42,8 @@ "command_line_srt_network_allowed_domains", "command_line_srt_deny_read", "command_line_srt_allow_unix_sockets", + "command_line_srt_read_mode", + "command_line_srt_allow_read", "exclude_file_operation_mcps", "use_mcpwrapped_for_tool_filtering", "use_no_roots_wrapper", diff --git a/massgen/backend/base.py b/massgen/backend/base.py index b9f2bb651..d91ef8546 100644 --- a/massgen/backend/base.py +++ b/massgen/backend/base.py @@ -191,6 +191,13 @@ def __init__(self, api_key: str | None = None, **kwargs): f"Invalid command_line_docker_network_mode: '{network_mode}'. Must be 'none', 'bridge', or 'host'.", ) + # Validate SRT read-confinement mode + srt_read_mode = kwargs.get("command_line_srt_read_mode", "confined") + if srt_read_mode not in ["confined", "strict", "open"]: + raise ValueError( + f"Invalid command_line_srt_read_mode: '{srt_read_mode}'. Must be 'confined', 'strict', or 'open'.", + ) + # Extract all FilesystemManager parameters from kwargs filesystem_params = { "cwd": cwd, @@ -215,6 +222,10 @@ def __init__(self, api_key: str | None = None, **kwargs): "command_line_srt_network_allowed_domains": kwargs.get("command_line_srt_network_allowed_domains", []), "command_line_srt_deny_read": kwargs.get("command_line_srt_deny_read", []), "command_line_srt_allow_unix_sockets": kwargs.get("command_line_srt_allow_unix_sockets", []), + # Read confinement: "confined" (default) denies $HOME, re-allows + # managed paths; "strict" denies "/"; "open" allows-all minus secrets. + "command_line_srt_read_mode": srt_read_mode, + "command_line_srt_allow_read": kwargs.get("command_line_srt_allow_read", []), "enable_audio_generation": kwargs.get("enable_audio_generation", False), "exclude_file_operation_mcps": kwargs.get("exclude_file_operation_mcps", False), "use_mcpwrapped_for_tool_filtering": kwargs.get("use_mcpwrapped_for_tool_filtering", False), diff --git a/massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml b/massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml index 12765746f..1f62fc95b 100644 --- a/massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml +++ b/massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml @@ -17,11 +17,8 @@ # Run with: # uv run massgen --automation --config massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml \ # "Create out.txt in the workspace, then try to read ~/.ssh/id_rsa" -# Expected: the workspace write succeeds; reading a secret store like ~/.ssh and -# all network egress are denied by the sandbox. (SRT allows general filesystem -# READS by default so commands can run; we deny the well-known secret locations — -# ~/.ssh, ~/.aws, ~/.gnupg, cloud creds, etc. — and all WRITES outside the -# workspace. Add more read-denied paths via command_line_srt_deny_read.) +# Expected: the workspace write succeeds; reading ~/.ssh (or anything else in your +# home dir outside the workspace/context) and all network egress are denied. agents: - id: "sandboxed" @@ -36,6 +33,19 @@ agents: command_line_execution_mode: "srt" # ── Optional SRT knobs (all default-safe) ────────────────────────────── + # READ confinement. Default "confined": denies all of $HOME (personal data, + # secrets, other projects) and re-allows only the workspace + context paths; + # system paths (/usr, /opt, …) stay readable so commands run. + # confined (default) — deny $HOME, allow workspace+context (+ allow_read) + # strict — deny "/", allow only workspace+context+system+extras + # open — allow-all reads except a built-in secret denylist + # command_line_srt_read_mode: "confined" + # + # Extra paths the sandboxed commands may READ (e.g. a shared cache the + # interpreter needs, or a data dir outside the workspace): + # command_line_srt_allow_read: + # - "~/.cache/uv" + # # Network is DENY-ALL by default. Each allowlisted domain is a capability # grant (an allowlisted domain can be an exfiltration channel), so add only # what the sandboxed commands genuinely need: @@ -43,9 +53,9 @@ agents: # - "pypi.org" # - "files.pythonhosted.org" # - # Extra paths to block from reading (on top of the agent's protected paths): + # Extra paths to block from reading (effective outside the allow-read region): # command_line_srt_deny_read: - # - "~/.aws" + # - "/opt/secret-data" # # Unix sockets to allow (advanced; e.g. a local service): # command_line_srt_allow_unix_sockets: [] diff --git a/massgen/filesystem_manager/_filesystem_manager.py b/massgen/filesystem_manager/_filesystem_manager.py index 8b4116faf..43b450c89 100644 --- a/massgen/filesystem_manager/_filesystem_manager.py +++ b/massgen/filesystem_manager/_filesystem_manager.py @@ -174,6 +174,8 @@ def __init__( command_line_srt_network_allowed_domains: list[str] | None = None, command_line_srt_deny_read: list[str] | None = None, command_line_srt_allow_unix_sockets: list[str] | None = None, + command_line_srt_read_mode: str = "confined", + command_line_srt_allow_read: list[str] | None = None, enable_audio_generation: bool = False, enable_file_generation: bool = False, exclude_file_operation_mcps: bool = False, @@ -313,6 +315,8 @@ def __init__( self.command_line_srt_network_allowed_domains = command_line_srt_network_allowed_domains or [] self.command_line_srt_deny_read = command_line_srt_deny_read or [] self.command_line_srt_allow_unix_sockets = command_line_srt_allow_unix_sockets or [] + self.command_line_srt_read_mode = command_line_srt_read_mode or "confined" + self.command_line_srt_allow_read = command_line_srt_allow_read or [] # Initialize Docker manager if Docker mode enabled self.docker_manager = None @@ -375,6 +379,8 @@ def __init__( network_allowed_domains=self.command_line_srt_network_allowed_domains, extra_deny_read=self.command_line_srt_deny_read, allow_unix_sockets=self.command_line_srt_allow_unix_sockets, + read_mode=self.command_line_srt_read_mode, + allow_read=self.command_line_srt_allow_read, settings_dir=Path(tempfile.gettempdir()) / "massgen_srt", ) diff --git a/massgen/filesystem_manager/_srt_manager.py b/massgen/filesystem_manager/_srt_manager.py index 86ebec2d9..b76e4aabc 100644 --- a/massgen/filesystem_manager/_srt_manager.py +++ b/massgen/filesystem_manager/_srt_manager.py @@ -61,6 +61,39 @@ ) _DEFAULT_DENY_READ_ABSOLUTE = ("/etc/shadow",) +# Read-confinement modes (SRT reads are allow-all by default; deny-then-allow, where +# allowRead WINS over denyRead). +# "confined" (default): deny all of $HOME, re-allow the agent's managed paths +# (workspace + context + temp). Denies personal data/secrets/other projects; +# system paths (/usr, /opt, …) outside $HOME stay readable so commands run. +# "strict": deny "/", re-allow managed paths + a system runtime baseline + extras. +# Tightest; may break commands that read an unlisted path. +# "open": allow-all reads except the built-in secret denylist + extras. +READ_MODE_CONFINED = "confined" +READ_MODE_STRICT = "strict" +READ_MODE_OPEN = "open" +READ_MODES = (READ_MODE_CONFINED, READ_MODE_STRICT, READ_MODE_OPEN) + +# System roots a sandboxed command needs to READ to run, used by "strict" mode. +_STRICT_SYSTEM_READ_BASELINE = ( + "/usr", + "/bin", + "/sbin", + "/etc", + "/opt", + "/dev", + "/tmp", + "/var", + "/private", # macOS: /private/var, /private/tmp + "/System", # macOS + "/Library", # macOS + "/lib", # linux + "/lib64", + "/proc", + "/sys", + "/run", +) + # Profiles ------------------------------------------------------------------- # "execution": tight — reflects exactly what the AGENT may write. # "fs_tools": widened — the fs-tools MCP server also writes temp + snapshot @@ -114,6 +147,8 @@ def __init__( network_allowed_domains: list[str] | None = None, extra_deny_read: list[str] | None = None, allow_unix_sockets: list[str] | None = None, + read_mode: str = READ_MODE_CONFINED, + allow_read: list[str] | None = None, fs_tools_extra_writable: list[str | Path] | None = None, settings_dir: str | Path | None = None, srt_path: str = DEFAULT_SRT_BINARY, @@ -122,6 +157,8 @@ def __init__( self.network_allowed_domains = list(network_allowed_domains or []) self.extra_deny_read = list(extra_deny_read or []) self.allow_unix_sockets = list(allow_unix_sockets or []) + self.read_mode = read_mode if read_mode in READ_MODES else READ_MODE_CONFINED + self.allow_read = list(allow_read or []) self.fs_tools_extra_writable = [Path(p).resolve() for p in (fs_tools_extra_writable or [])] self.settings_dir = Path(settings_dir) if settings_dir else None self.srt_path = srt_path @@ -157,20 +194,41 @@ def build_settings(self, profile: str = EXECUTION_PROFILE) -> dict[str, Any]: deny_write = [str(mp.path) for mp in managed if mp.permission == Permission.READ and str(mp.path) not in writable_set] deny_write += protected_paths - # Reads: SRT defaults to allow-all, so deny the well-known secret stores - # (else a sandboxed `cat ~/.ssh/id_rsa` exfiltrates), plus per-context - # protected paths and any user-configured extras. - home = Path.home() - deny_read: list[str] = [str(home / rel) for rel in _DEFAULT_DENY_READ_HOME_RELATIVE] - deny_read += list(_DEFAULT_DENY_READ_ABSOLUTE) - deny_read += protected_paths - deny_read += list(self.extra_deny_read) + # Reads: SRT defaults to allow-all (deny-then-allow; allowRead WINS over + # denyRead). The managed paths the agent may read (re-allowed within any + # denied region) plus user-configured extras: + home = str(Path.home()) + managed_readable = [str(mp.path) for mp in managed] + allow_read = managed_readable + [str(p) for p in self.fs_tools_extra_writable] + list(self.allow_read) + + if self.read_mode == READ_MODE_OPEN: + # Allow-all minus the secret denylist + protected + extras. (No allowRead: + # protected/secret denies stay effective because nothing re-allows them.) + deny_read = [str(Path(home) / rel) for rel in _DEFAULT_DENY_READ_HOME_RELATIVE] + deny_read += list(_DEFAULT_DENY_READ_ABSOLUTE) + deny_read += protected_paths + deny_read += list(self.extra_deny_read) + allow_read = [] + elif self.read_mode == READ_MODE_STRICT: + # Deny everything; re-allow only managed paths + a system runtime baseline + # (so the interpreter/libs are readable) + user extras. + deny_read = ["/"] + list(self.extra_deny_read) + allow_read = sorted(set(allow_read) | set(_STRICT_SYSTEM_READ_BASELINE)) + else: # READ_MODE_CONFINED (default) + # Deny all of $HOME (personal data/secrets/other projects), re-allow the + # agent's managed paths within it; system paths outside $HOME stay + # readable so commands run. NOTE: a protected sub-path inside an + # allow-read context can't be read-denied here (allowRead wins); its + # write-immunity (denyWrite) still holds. + deny_read = [home, *_DEFAULT_DENY_READ_ABSOLUTE] + deny_read += list(self.extra_deny_read) return { "filesystem": { "allowWrite": sorted(writable_set), "denyWrite": sorted(set(deny_write)), "denyRead": sorted(set(deny_read)), + "allowRead": sorted(set(allow_read)), }, "network": { "allowedDomains": list(self.network_allowed_domains), diff --git a/massgen/tests/test_srt_manager.py b/massgen/tests/test_srt_manager.py index ed87bb65c..67654cb9f 100644 --- a/massgen/tests/test_srt_manager.py +++ b/massgen/tests/test_srt_manager.py @@ -76,14 +76,6 @@ def test_execution_profile_temp_and_read_context_are_not_writable(pm_with_paths) assert str(pm_with_paths["ctx_read"]) in deny_write -def test_protected_paths_are_deny_read(pm_with_paths): - mgr = SrtManager(pm_with_paths["pm"], extra_deny_read=["/some/extra/secret"]) - settings = mgr.build_settings(profile="execution") - deny_read = settings["filesystem"]["denyRead"] - assert str(pm_with_paths["protected"]) in deny_read - assert "/some/extra/secret" in deny_read - - def test_protected_paths_are_also_deny_write(pm_with_paths): # Protected paths are immune from modification even inside a writable context. mgr = SrtManager(pm_with_paths["pm"]) @@ -91,17 +83,56 @@ def test_protected_paths_are_also_deny_write(pm_with_paths): assert str(pm_with_paths["protected"]) in settings["filesystem"]["denyWrite"] -def test_secret_stores_are_deny_read_by_default(pm_with_paths): - # SRT reads are allow-all by default; the manager must deny known secret stores - # so a sandboxed `cat ~/.ssh/id_rsa` is blocked. +# --------------------------------------------------------------------------- # +# Read-confinement modes (SRT reads are allow-all by default; allowRead WINS) +# --------------------------------------------------------------------------- # +def test_default_read_mode_is_confined(pm_with_paths): + assert SrtManager(pm_with_paths["pm"]).read_mode == "confined" + + +def test_confined_mode_denies_home_allows_managed(pm_with_paths): from pathlib import Path - mgr = SrtManager(pm_with_paths["pm"]) - deny_read = mgr.build_settings(profile="execution")["filesystem"]["denyRead"] + mgr = SrtManager(pm_with_paths["pm"]) # default confined + fs = mgr.build_settings(profile="execution")["filesystem"] + # $HOME denied (covers ~/.ssh, ~/.aws, other projects, personal data)… + assert str(Path.home()) in fs["denyRead"] + assert "/etc/shadow" in fs["denyRead"] + # …but the agent's managed paths are re-allowed (allowRead wins over denyRead). + assert str(pm_with_paths["workspace"]) in fs["allowRead"] + assert str(pm_with_paths["ctx_read"]) in fs["allowRead"] + + +def test_strict_mode_denies_root_allows_managed_and_system(pm_with_paths): + mgr = SrtManager(pm_with_paths["pm"], read_mode="strict") + fs = mgr.build_settings(profile="execution")["filesystem"] + assert fs["denyRead"] == ["/"] + assert str(pm_with_paths["workspace"]) in fs["allowRead"] + assert "/usr" in fs["allowRead"] # system baseline so commands can run + + +def test_open_mode_uses_secret_denylist(pm_with_paths): + from pathlib import Path + + mgr = SrtManager(pm_with_paths["pm"], read_mode="open", extra_deny_read=["/some/extra/secret"]) + fs = mgr.build_settings(profile="execution")["filesystem"] home = Path.home() - for rel in (".ssh", ".aws", ".gnupg", ".config/gcloud"): - assert str(home / rel) in deny_read, f"{rel} should be read-denied by default" - assert "/etc/shadow" in deny_read + for rel in (".ssh", ".aws", ".gnupg"): + assert str(home / rel) in fs["denyRead"] + # protected + extras are read-denied in open mode (nothing re-allows them). + assert str(pm_with_paths["protected"]) in fs["denyRead"] + assert "/some/extra/secret" in fs["denyRead"] + assert fs["allowRead"] == [] + + +def test_allow_read_extras_propagate(pm_with_paths): + mgr = SrtManager(pm_with_paths["pm"], allow_read=["/opt/shared-cache"]) + fs = mgr.build_settings(profile="execution")["filesystem"] + assert "/opt/shared-cache" in fs["allowRead"] + + +def test_invalid_read_mode_falls_back_to_confined(pm_with_paths): + assert SrtManager(pm_with_paths["pm"], read_mode="bogus").read_mode == "confined" # --------------------------------------------------------------------------- # From 332abb849789dca8a833767afce65dc7b4f32fb0 Mon Sep 17 00:00:00 2001 From: HenryQi Date: Wed, 10 Jun 2026 14:17:34 +0800 Subject: [PATCH 4/7] feat: v0.1.96 --- ROADMAP_v0.1.96.md => ROADMAP_v0.1.97.md | 0 massgen/__init__.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename ROADMAP_v0.1.96.md => ROADMAP_v0.1.97.md (100%) diff --git a/ROADMAP_v0.1.96.md b/ROADMAP_v0.1.97.md similarity index 100% rename from ROADMAP_v0.1.96.md rename to ROADMAP_v0.1.97.md diff --git a/massgen/__init__.py b/massgen/__init__.py index 1b4853d9e..d4cc7ad95 100644 --- a/massgen/__init__.py +++ b/massgen/__init__.py @@ -86,7 +86,7 @@ from .message_templates import MessageTemplates, get_templates from .orchestrator import Orchestrator, create_orchestrator -__version__ = "0.1.95" +__version__ = "0.1.96" __author__ = "MassGen Contributors" From 9d5125082d32008e61eb6853585ae4bc9fdec40d Mon Sep 17 00:00:00 2001 From: ncrispino Date: Wed, 10 Jun 2026 09:17:33 -0700 Subject: [PATCH 5/7] fix(sandbox): re-allow framework read roots so fs-tools MCP server starts under confined SRT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live-smoke-testing the srt_sandbox.yaml demo surfaced that the OS-wrapped workspace_tools MCP server failed to start under the default confined read mode: SRT denied reading the server's own code because, in a normal dev/editable install, the venv (fastmcp + deps + interpreter), the massgen package source, and git's user config all live under $HOME — exactly the region confined denies. First the server's own _workspace_tools_server.py script was unreadable; after re-allowing the code roots, GitPython's import-time `git version` then failed reading ~/.gitconfig. Either way the server never connected and the agent silently fell back to shell-only (security held — fail-closed — but the filesystem MCP file-op tools were unavailable whenever srt was on). Fix: build_settings() now re-allows the framework's runtime read roots for the fs_tools profile only (sys.prefix/sys.base_prefix, the massgen package dir, and ~/.gitconfig + ~/.config/git). This is framework code/config, not user data, so secrets (.ssh/.aws/...) and other projects stay denied; the agent's own execution profile is untouched. The confined read mode landed in a later commit than the fs-tools wrapping, and the unit tests only checked argv/wiring shape — not a live server handshake — so this slipped through. TDD: added test_fs_tools_profile_confined_allows_reading_framework_runtime (red first) asserting the fs_tools allowRead covers the framework roots + git config while $HOME stays in denyRead, plus test_execution_profile_does_not_widen_for_framework_runtime to keep the agent sandbox tight. Verified live: the agent now creates files via the filesystem MCP tools while a shell read of a $HOME secret is still OS-denied. Co-Authored-By: Claude Opus 4.8 --- ROADMAP_v0.1.96.md => ROADMAP_v0.1.97.md | 0 .../v0.1.95.md} | 0 massgen/filesystem_manager/_srt_manager.py | 44 ++++++++++++++ massgen/tests/test_srt_manager.py | 58 +++++++++++++++++++ 4 files changed, 102 insertions(+) rename ROADMAP_v0.1.96.md => ROADMAP_v0.1.97.md (100%) rename docs/announcements/{current-release.md => archive/v0.1.95.md} (100%) diff --git a/ROADMAP_v0.1.96.md b/ROADMAP_v0.1.97.md similarity index 100% rename from ROADMAP_v0.1.96.md rename to ROADMAP_v0.1.97.md diff --git a/docs/announcements/current-release.md b/docs/announcements/archive/v0.1.95.md similarity index 100% rename from docs/announcements/current-release.md rename to docs/announcements/archive/v0.1.95.md diff --git a/massgen/filesystem_manager/_srt_manager.py b/massgen/filesystem_manager/_srt_manager.py index b76e4aabc..620bd904c 100644 --- a/massgen/filesystem_manager/_srt_manager.py +++ b/massgen/filesystem_manager/_srt_manager.py @@ -131,6 +131,40 @@ def srt_available(srt_path: str = DEFAULT_SRT_BINARY) -> bool: return shutil.which(srt_path) is not None +def _framework_read_roots() -> list[str]: + """Read roots the framework's OWN MCP servers need to start under SRT. + + When SRT wraps a framework server (``fastmcp run …``), the + sandbox must be able to READ the server's code, the Python interpreter, the + installed dependencies (fastmcp, mcp, GitPython, …), and the runtime config those + dependencies require at startup. In a typical dev/editable install all of these + live under ``$HOME`` — exactly the region ``confined``/``strict`` deny — so without + re-allowing them ``srt`` denies the read and the server never starts (first the + server's own script, then GitPython's ``git version`` reading ``~/.gitconfig``). + + Returns, all framework code/runtime (NOT user data — these don't widen access to + secrets or other projects): + - ``sys.prefix`` / ``sys.base_prefix`` — interpreter + site-packages (deps) + - the ``massgen`` package directory — the server scripts + - git's user config (``~/.gitconfig``, ``~/.config/git``) — git is core to the + workspace model (snapshots/commits via GitPython); git reads its global config + on essentially every invocation. + """ + import sys + + roots = {sys.prefix, sys.base_prefix} + try: + import massgen + + roots.add(str(Path(massgen.__file__).resolve().parent)) + except Exception: # pragma: no cover - massgen is always importable in practice + pass + home = Path.home() + roots.add(str(home / ".gitconfig")) + roots.add(str(home / ".config" / "git")) + return sorted(roots) + + class SrtManager: """Builds per-agent SRT settings and wraps commands. @@ -201,6 +235,16 @@ def build_settings(self, profile: str = EXECUTION_PROFILE) -> dict[str, Any]: managed_readable = [str(mp.path) for mp in managed] allow_read = managed_readable + [str(p) for p in self.fs_tools_extra_writable] + list(self.allow_read) + # The fs-tools profile wraps a FRAMEWORK MCP server (fastmcp run ). Under confined/strict the server's own code + interpreter + deps + # live in a denied region ($HOME), so re-allow the framework runtime roots or + # the wrapped server can't read its own script and fails to start. This is + # framework code, not user data; the agent's own command sandbox (execution + # profile) stays tight and is unaffected. ("open" re-allows nothing because + # it is allow-all-minus-denylist; the framework roots are readable anyway.) + if profile == FS_TOOLS_PROFILE: + allow_read = allow_read + _framework_read_roots() + if self.read_mode == READ_MODE_OPEN: # Allow-all minus the secret denylist + protected + extras. (No allowRead: # protected/secret denies stay effective because nothing re-allows them.) diff --git a/massgen/tests/test_srt_manager.py b/massgen/tests/test_srt_manager.py index 67654cb9f..c7d78844e 100644 --- a/massgen/tests/test_srt_manager.py +++ b/massgen/tests/test_srt_manager.py @@ -151,6 +151,64 @@ def test_fs_tools_profile_widens_writes_for_temp_and_snapshot(pm_with_paths, tmp assert str(snapshot.resolve()) in allow_write +def _is_read_allowed(allow_read, target: str) -> bool: + """True if `target` is covered by some allowRead root (itself or an ancestor).""" + from pathlib import Path as _P + + t = _P(target).resolve() + for root in allow_read: + r = _P(root).resolve() + if t == r or r in t.parents: + return True + return False + + +def test_fs_tools_profile_confined_allows_reading_framework_runtime(pm_with_paths): + """REGRESSION: when SRT wraps a framework MCP server (fastmcp run ), + confined mode denies all of $HOME — but the venv (fastmcp + deps + interpreter) and + the massgen package source both live under $HOME. Without re-allowing the framework's + own read roots, `srt` denies reading the server's own code and the server can't start + ("Operation not permitted: _workspace_tools_server.py"). The fs_tools profile must + re-allow the framework runtime roots so the wrapped server can read its own code while + $HOME otherwise stays denied. + """ + import sys + from pathlib import Path + + import massgen + + mgr = SrtManager(pm_with_paths["pm"]) # default confined + fs = mgr.build_settings(profile="fs_tools")["filesystem"] + + # $HOME is still denied (we didn't just open everything back up). + assert str(Path.home()) in fs["denyRead"] + + # The framework's own code + interpreter + deps must be readable (allowRead wins). + massgen_dir = Path(massgen.__file__).resolve().parent + assert _is_read_allowed(fs["allowRead"], str(massgen_dir)), "massgen package dir must be readable by the wrapped fs-tools server" + assert _is_read_allowed(fs["allowRead"], sys.prefix), "Python prefix (venv: fastmcp + deps) must be readable" + assert _is_read_allowed(fs["allowRead"], sys.base_prefix), "base Python prefix must be readable" + + # git is core to the workspace model (GitPython reads ~/.gitconfig at import), so + # its user config must be readable too — else the server crashes on import under + # confined ("unable to access '~/.gitconfig': Operation not permitted"). + assert _is_read_allowed(fs["allowRead"], str(Path.home() / ".gitconfig")), "git user config must be readable by the wrapped fs-tools server" + + +def test_execution_profile_does_not_widen_for_framework_runtime(pm_with_paths): + """The framework-runtime re-allow is fs_tools-only: the agent's own command sandbox + (execution profile) stays tight and must NOT gain the massgen package dir just because + fs_tools needs it.""" + from pathlib import Path + + import massgen + + mgr = SrtManager(pm_with_paths["pm"]) # default confined + fs = mgr.build_settings(profile="execution")["filesystem"] + massgen_dir = str(Path(massgen.__file__).resolve().parent) + assert massgen_dir not in fs["allowRead"] + + # --------------------------------------------------------------------------- # # network — deny-all by default, opt-in allowlist # --------------------------------------------------------------------------- # From 4d5c2c1fdee6ed599ccb2062153b0f33a88f9f9d Mon Sep 17 00:00:00 2001 From: ncrispino Date: Wed, 10 Jun 2026 09:18:55 -0700 Subject: [PATCH 6/7] =?UTF-8?q?docs(release):=20prepare=20v0.1.96=20?= =?UTF-8?q?=E2=80=94=20OS-Level=20Agent=20Sandboxing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Release-prep doc sweep for v0.1.96 (the SRT sandboxing release): - CHANGELOG: full v0.1.96 entry (theme, SRT mode, read confinement, hardened permission hook, native-backend degrade, tests, framework-read-roots fix). - README: Latest Features + Recent Achievements rewritten for v0.1.96; v0.1.95 moved to Previous Achievements; TOC anchors + bottom roadmap shifted. - ROADMAP: current version → v0.1.96, new completed section; planned multimodal image/video edit work shifts to v0.1.97 (ROADMAP_v0.1.97.md content). - docs/source/index.rst + massgen/configs/README.md: v0.1.96 release entries. - Announcement: new concise current-release.md (no link in the posted body, pending social links). v0.1.95 announcement was archived in the prior commit. Co-Authored-By: Claude Opus 4.8 --- CHANGELOG.md | 24 ++++++++++++ README.md | 48 ++++++++++++------------ README_PYPI.md | 48 ++++++++++++------------ ROADMAP.md | 28 +++++++++++--- ROADMAP_v0.1.97.md | 7 ++-- docs/announcements/current-release.md | 54 +++++++++++++++++++++++++++ docs/source/index.rst | 4 ++ massgen/configs/README.md | 22 ++++++++++- 8 files changed, 180 insertions(+), 55 deletions(-) create mode 100644 docs/announcements/current-release.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 8504f99d3..e10fa97f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.96] - 2026-06-10 + +### Theme: OS-Level Agent Sandboxing + +Add a real OS-level execution sandbox for agents via Anthropic's [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) (`srt`: bubblewrap on Linux, Seatbelt on macOS), and harden the existing application-layer permission hook against file-tool escapes. **Defense in depth by design**: the OS layer (`SrtManager`) and the app layer (`PathPermissionManager`) are derived from the *same* path policy and both stay active — SRT closes the shell escape hatch (e.g. `echo x > /etc/passwd`, which never goes through a file tool), the hardened hook closes file-tool escapes (`write_file`/`move`/`copy` to/from outside the workspace). Default-off, one-knob opt-in (`command_line_execution_mode: srt`); current behavior is unchanged unless a config turns it on. All items landed under TDD (tests written first, confirmed red, then green), plus live verification across multiple backends. + +### Added +- **SRT sandbox mode (`command_line_execution_mode: srt`)**: a third command-execution mode alongside `local`/`docker`. `SrtManager` (`massgen/filesystem_manager/_srt_manager.py`) derives per-agent SRT settings from `PathPermissionManager.managed_paths` — `allowWrite` for writable paths, `denyWrite` for read-only/protected paths, **network deny-all by default** (allowlist is opt-in, documented as a capability grant), and a built-in secret-store read-deny baseline. Commands are wrapped as `srt --settings cfg sh -c ''` (the `sh -c` form is required so `srt` does not consume the server's `--` separator). Both the command-line MCP and the filesystem-tools MCP servers are OS-wrapped (defense in depth); npx/npm launchers and the no-roots wrapper auto-skip wrapping (they need registry + `~/.npm` writes the sandbox blocks) and keep their app-layer protection. Example config: `massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml`. +- **Configurable SRT read confinement (`command_line_srt_read_mode`, default `confined`)**: SRT reads are allow-all by default, so `confined` denies all of `$HOME` (personal data, secrets, other projects) and re-allows only the workspace + context + temp paths while system paths stay readable so commands run; `strict` denies `/` and allows only managed paths + a system runtime baseline + extras; `open` allows-all reads minus a built-in secret denylist + extras. `command_line_srt_allow_read` widens the allow-list per config. New backend params `command_line_srt_network_allowed_domains` / `_deny_read` / `_allow_unix_sockets` / `_allow_read` / `_read_mode` added to the single-source exclusion list; `srt` added to the MCP executable allowlist. When the `fs_tools` profile OS-wraps a framework MCP server (`fastmcp run `), the framework's own read roots — the interpreter/site-packages (`sys.prefix`/`sys.base_prefix`), the `massgen` package source, and git's user config (`~/.gitconfig`, `~/.config/git`; git is core to the workspace snapshot model) — are re-allowed so the wrapped server can read its own code/runtime under `confined`/`strict` while user secrets stay denied. The agent's own `execution` profile is unaffected. +- **Subagent SRT inheritance**: subagents inherit the parent's `command_line_srt_*` settings (parity with Docker). + +### Changed +- **Native-sandbox backends degrade `srt`→`local`**: `has_native_execution_sandbox()` (True for `codex` `--full-auto` and `claude_code`) prevents nested Seatbelt/Landlock hangs; the stored config is normalized so downstream raw reads see `local`. + +### Fixed +- **Permission-hook hardening (`PathPermissionManager`)**: new `_validate_no_path_arg_escapes` — a key-agnostic scan that walks the full tool-args tree (nested dicts + lists) and denies any value resolving outside all managed areas. Closes the prior **fail-open** behavior (path under an unrecognized key, list-valued path, or `move`/`copy` `source` pointing outside the workspace) without false positives (non-path strings resolve harmlessly inside the workspace; content keys are skipped). Symlinks/`..` were already handled by `.resolve()`. + +### Tests +- New deterministic suites: `test_srt_manager.py` (settings derivation, profiles, secret read-deny baseline, protected-path read+write deny, wrapping, availability guards), `test_srt_filesystem_integration.py` (command-line + fs-tools config wiring, `sh -c` wrap, npx / no-roots auto-skip, MCP-security validation), `test_srt_backend_degrade.py` (`srt`→`local` degrade for native-sandbox backends; API backends keep `srt`), `test_path_permission_hook_adversarial.py` (15 escape vectors — absolute/`..`/symlink/unrecognized-key/list/nested-dict/move-source/copy-source/read-exfil — plus false-positive guards), and `test_subagent_manager.py::TestSrtSettingsInheritance` (subagent inherits parent SRT settings). +- Live-verified (macOS 15.7, srt 1.0.0): standalone srt (allowed-write, out-of-scope write blocked, deny-all network blocked, secret read blocked); 3 API backends (OpenRouter/`chatcompletion`, OpenAI Responses, Gemini) with workspace write OK and out-of-workspace write/file-tool escape blocked; codex + srt and claude_code + srt degrade to local and complete via their native sandbox. + +### Documentations, Configurations and Resources +- **New Config**: `massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml` — fully-commented SRT opt-in example with all read/network/socket knobs documented. + ## [0.1.95] - 2026-06-08 ### Theme: Steering Improvements diff --git a/README.md b/README.md index da728d8e9..c71cad92c 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ This project started with the "threads of thought" and "iterative refinement" id

🆕 Latest Features

-- [v0.1.94 Features](#-latest-features-v0194) +- [v0.1.96 Features](#-latest-features-v0196)
@@ -122,15 +122,15 @@ This project started with the "threads of thought" and "iterative refinement" id

🗺️ Roadmap

-- [Recent Achievements (v0.1.95)](#recent-achievements-v0195) -- [Previous Achievements (v0.0.3 - v0.1.94)](#previous-achievements-v003---v0194) +- [Recent Achievements (v0.1.96)](#recent-achievements-v0196) +- [Previous Achievements (v0.0.3 - v0.1.95)](#previous-achievements-v003---v0195) - [Key Future Enhancements](#key-future-enhancements) - Bug Fixes & Backend Improvements - Advanced Agent Collaboration - Expanded Model, Tool & Agent Integrations - Improved Performance & Scalability - Enhanced Developer Experience -- [v0.1.96 Roadmap](#v0196-roadmap) +- [v0.1.97 Roadmap](#v0197-roadmap)
@@ -155,18 +155,18 @@ This project started with the "threads of thought" and "iterative refinement" id --- -## 🆕 Latest Features (v0.1.95) +## 🆕 Latest Features (v0.1.96) -**🎉 Released: June 8, 2026** +**🎉 Released: June 10, 2026** -**What's New in v0.1.95** (Steering Improvements): -- **📨 Programmatic Steering Inbox** - Drop human guidance into a streaming agent from `--automation` (no UI) via a file inbox (`--inbox-dir` + `send_steering_message()`), routed through the same chokepoint the TUI/WebUI already use, with per-message targeting. -- **⏯️ Interrupt-and-Resume Steering** - Codex and Antigravity now interrupt the in-flight turn and resume (`codex exec resume` / `agy --continue`) when steering arrives mid-stream, instead of waiting for a round boundary — pre-interrupt work is preserved. -- **🪝 MCP-Hook Injection Parity** - Antigravity gains codex-parity mid-stream injection through the MCP middleware, with `expires_at`-guarded payloads; the Antigravity `--model` flag is now actually wired through. +**What's New in v0.1.96** (OS-Level Agent Sandboxing): +- **🛡️ OS-Level Execution Sandbox** - Confine agent command/code execution at the OS level via Anthropic's [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) (bubblewrap on Linux, Seatbelt on macOS) with one knob: `command_line_execution_mode: srt`. Filesystem + network isolation derived from the same path policy as the application layer — **defense in depth**, both layers active. +- **🔒 Configurable Read Confinement** - By default (`confined`), sandboxed commands can't read your `$HOME` (secrets, other projects) — only the workspace + context — while system paths stay readable so commands still run. `strict` and `open` modes available. Network is **deny-all by default**; each allowlisted domain is an explicit capability grant. +- **🧱 Hardened Permission Hook** - A key-agnostic scan walks the full tool-args tree (nested dicts + lists) and denies any value resolving outside managed areas, closing prior fail-open gaps (unrecognized key, list-valued paths, `move`/`copy` source pointing outside) — with no false positives. -**Install v0.1.95:** +**Install v0.1.96:** ```bash -pip install massgen==0.1.95 +pip install massgen==0.1.96 ``` → [See full release history and examples](massgen/configs/README.md#release-history--examples) @@ -1241,18 +1241,20 @@ MassGen is currently in its foundational stage, with a focus on parallel, asynch ⚠️ **Early Stage Notice:** As MassGen is in active development, please expect upcoming breaking architecture changes as we continue to refine and improve the system. -### Recent Achievements (v0.1.95) +### Recent Achievements (v0.1.96) -**🎉 Released: June 8, 2026** +**🎉 Released: June 10, 2026** -#### Steering Improvements -- **Programmatic Steering Inbox (`--inbox-dir`)**: `send_steering_message()` writes a `msg_*.json` to a caller-known inbox; `RuntimeInboxPoller` routes it through `RuntimeInputDelivery` to the same `set_pending_input` chokepoint the TUI (`_queue_human_input`) and WebUI (`broadcast_response`) use — so `--automation` and any UI-less caller can inject mid-stream human input, with per-message targeting (one / subset / broadcast) -- **Interrupt-and-Resume Steering (Codex & Antigravity)**: steering mid-turn now kills the in-flight turn and resumes (`codex exec resume ` / `agy --continue -p `) rather than waiting for a round boundary; Antigravity promotes pre-interrupt scratch deliverables first so work isn't lost -- **MCP-Server-Hook Payload IPC (Antigravity, codex parity)**: `write_post_tool_use_hook()` / `read_unconsumed_hook_content()` with `expires_at`-guarded payloads consumed by the MCP middleware, so the backend-agnostic per-chunk injection flush works for `agy` the way it does for codex -- **Fixes**: `--inbox-dir` now honored for resumed sessions (`--session-id` / config `session_id` / `--continue`), `expires_at`-guarded steering carryforward (both backends), watcher-cleanup failures logged instead of swallowed, round-1 native-hook gap closed, and the Antigravity `--model` flag wired through -- All landed under TDD, with deterministic coverage plus opt-in live-fire tests +#### OS-Level Agent Sandboxing +- **OS-Level Execution Sandbox (`command_line_execution_mode: srt`)**: a third execution mode alongside `local`/`docker` that wraps agent command/code execution in Anthropic's [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) (bubblewrap on Linux, Seatbelt on macOS). `SrtManager` (`massgen/filesystem_manager/_srt_manager.py`) derives per-agent OS-enforced filesystem + network isolation from the **same** `PathPermissionManager` policy as the app layer — defense in depth, both layers active. Default-off, one-knob opt-in +- **Configurable Read Confinement (`command_line_srt_read_mode`, default `confined`)**: SRT reads are allow-all by default, so `confined` denies all of `$HOME` and re-allows only the workspace + context (system paths stay readable so commands run); `strict` denies `/` and allows only managed + a system baseline; `open` allows-all minus a secret denylist. `command_line_srt_allow_read` widens it per config. **Network is deny-all by default** — each allowlisted domain is an explicit capability grant +- **Hardened Permission Hook**: a new key-agnostic scan (`_validate_no_path_arg_escapes`) walks the full tool-args tree (nested dicts + lists) and denies any value resolving outside managed areas, closing prior fail-open gaps (path under an unrecognized key, list-valued paths, `move`/`copy` source pointing outside) without false positives +- **Parity & safety**: native-sandbox backends (Codex `--full-auto`, Claude Code) degrade `srt`→`local` to avoid nested-sandbox hangs; subagents inherit the parent's SRT settings (parity with Docker); live-verified across OpenRouter, OpenAI Responses, and Gemini backends +- All landed under TDD, with a 15-vector adversarial escape suite and an adversarially-verified multi-agent pre-merge review -### Previous Achievements (v0.0.3 - v0.1.94) +### Previous Achievements (v0.0.3 - v0.1.95) + +✅ **Steering Improvements (v0.1.95)**: Extended mid-stream steering from a UI-only capability into a programmatic, headless one — `send_steering_message()` drops guidance into a file inbox (`--inbox-dir`) routed through the same `set_pending_input` chokepoint the TUI/WebUI use — and upgraded Codex/Antigravity to interrupt-and-resume the in-flight turn (`codex exec resume` / `agy --continue`) instead of waiting for a round boundary, with `expires_at`-guarded MCP-hook payload IPC and the Antigravity `--model` flag wired through. ✅ **Parallelism Hardening — Engineering Health (v0.1.94)**: Moved the peer-context snapshot copy off the event loop (worker thread via `asyncio.to_thread`) backed by immutable, versioned snapshots (`SnapshotVersionStore`) with atomically-repointed symlinks and refcounted readers, eliminating the read-during-write race; fixed lost peer-answer revisions (R1), lost background-subagent results (R2/R3), leaked trace tasks (R4), and cancel-without-await teardown (R5); surfaced worktree-isolation degradation (D2); and unified the mid-stream injection paths (A1). No per-backend functionality changes. @@ -1587,9 +1589,9 @@ MassGen is currently in its foundational stage, with a focus on parallel, asynch We welcome community contributions to achieve these goals. -### v0.1.96 Roadmap +### v0.1.97 Roadmap -Version 0.1.96 picks up the image/video edit work deferred from v0.1.86-v0.1.95 and continues multimodal provider-parity work: +Version 0.1.97 picks up the image/video edit work deferred from v0.1.86-v0.1.96 and continues multimodal provider-parity work: #### Planned Features - **Image/Video Edit Capabilities** ([#959](https://github.com/massgen/MassGen/issues/959)): Image and video editing across providers with multi-turn editing workflows via continuation IDs diff --git a/README_PYPI.md b/README_PYPI.md index cd37ba8e1..91d68cfb0 100644 --- a/README_PYPI.md +++ b/README_PYPI.md @@ -68,7 +68,7 @@ This project started with the "threads of thought" and "iterative refinement" id

🆕 Latest Features

-- [v0.1.94 Features](#-latest-features-v0194) +- [v0.1.96 Features](#-latest-features-v0196)
@@ -121,15 +121,15 @@ This project started with the "threads of thought" and "iterative refinement" id

🗺️ Roadmap

-- [Recent Achievements (v0.1.95)](#recent-achievements-v0195) -- [Previous Achievements (v0.0.3 - v0.1.94)](#previous-achievements-v003---v0194) +- [Recent Achievements (v0.1.96)](#recent-achievements-v0196) +- [Previous Achievements (v0.0.3 - v0.1.95)](#previous-achievements-v003---v0195) - [Key Future Enhancements](#key-future-enhancements) - Bug Fixes & Backend Improvements - Advanced Agent Collaboration - Expanded Model, Tool & Agent Integrations - Improved Performance & Scalability - Enhanced Developer Experience -- [v0.1.96 Roadmap](#v0196-roadmap) +- [v0.1.97 Roadmap](#v0197-roadmap)
@@ -154,18 +154,18 @@ This project started with the "threads of thought" and "iterative refinement" id --- -## 🆕 Latest Features (v0.1.95) +## 🆕 Latest Features (v0.1.96) -**🎉 Released: June 8, 2026** +**🎉 Released: June 10, 2026** -**What's New in v0.1.95** (Steering Improvements): -- **📨 Programmatic Steering Inbox** - Drop human guidance into a streaming agent from `--automation` (no UI) via a file inbox (`--inbox-dir` + `send_steering_message()`), routed through the same chokepoint the TUI/WebUI already use, with per-message targeting. -- **⏯️ Interrupt-and-Resume Steering** - Codex and Antigravity now interrupt the in-flight turn and resume (`codex exec resume` / `agy --continue`) when steering arrives mid-stream, instead of waiting for a round boundary — pre-interrupt work is preserved. -- **🪝 MCP-Hook Injection Parity** - Antigravity gains codex-parity mid-stream injection through the MCP middleware, with `expires_at`-guarded payloads; the Antigravity `--model` flag is now actually wired through. +**What's New in v0.1.96** (OS-Level Agent Sandboxing): +- **🛡️ OS-Level Execution Sandbox** - Confine agent command/code execution at the OS level via Anthropic's [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) (bubblewrap on Linux, Seatbelt on macOS) with one knob: `command_line_execution_mode: srt`. Filesystem + network isolation derived from the same path policy as the application layer — **defense in depth**, both layers active. +- **🔒 Configurable Read Confinement** - By default (`confined`), sandboxed commands can't read your `$HOME` (secrets, other projects) — only the workspace + context — while system paths stay readable so commands still run. `strict` and `open` modes available. Network is **deny-all by default**; each allowlisted domain is an explicit capability grant. +- **🧱 Hardened Permission Hook** - A key-agnostic scan walks the full tool-args tree (nested dicts + lists) and denies any value resolving outside managed areas, closing prior fail-open gaps (unrecognized key, list-valued paths, `move`/`copy` source pointing outside) — with no false positives. -**Install v0.1.95:** +**Install v0.1.96:** ```bash -pip install massgen==0.1.95 +pip install massgen==0.1.96 ``` → [See full release history and examples](massgen/configs/README.md#release-history--examples) @@ -1240,18 +1240,20 @@ MassGen is currently in its foundational stage, with a focus on parallel, asynch ⚠️ **Early Stage Notice:** As MassGen is in active development, please expect upcoming breaking architecture changes as we continue to refine and improve the system. -### Recent Achievements (v0.1.95) +### Recent Achievements (v0.1.96) -**🎉 Released: June 8, 2026** +**🎉 Released: June 10, 2026** -#### Steering Improvements -- **Programmatic Steering Inbox (`--inbox-dir`)**: `send_steering_message()` writes a `msg_*.json` to a caller-known inbox; `RuntimeInboxPoller` routes it through `RuntimeInputDelivery` to the same `set_pending_input` chokepoint the TUI (`_queue_human_input`) and WebUI (`broadcast_response`) use — so `--automation` and any UI-less caller can inject mid-stream human input, with per-message targeting (one / subset / broadcast) -- **Interrupt-and-Resume Steering (Codex & Antigravity)**: steering mid-turn now kills the in-flight turn and resumes (`codex exec resume ` / `agy --continue -p `) rather than waiting for a round boundary; Antigravity promotes pre-interrupt scratch deliverables first so work isn't lost -- **MCP-Server-Hook Payload IPC (Antigravity, codex parity)**: `write_post_tool_use_hook()` / `read_unconsumed_hook_content()` with `expires_at`-guarded payloads consumed by the MCP middleware, so the backend-agnostic per-chunk injection flush works for `agy` the way it does for codex -- **Fixes**: `--inbox-dir` now honored for resumed sessions (`--session-id` / config `session_id` / `--continue`), `expires_at`-guarded steering carryforward (both backends), watcher-cleanup failures logged instead of swallowed, round-1 native-hook gap closed, and the Antigravity `--model` flag wired through -- All landed under TDD, with deterministic coverage plus opt-in live-fire tests +#### OS-Level Agent Sandboxing +- **OS-Level Execution Sandbox (`command_line_execution_mode: srt`)**: a third execution mode alongside `local`/`docker` that wraps agent command/code execution in Anthropic's [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) (bubblewrap on Linux, Seatbelt on macOS). `SrtManager` (`massgen/filesystem_manager/_srt_manager.py`) derives per-agent OS-enforced filesystem + network isolation from the **same** `PathPermissionManager` policy as the app layer — defense in depth, both layers active. Default-off, one-knob opt-in +- **Configurable Read Confinement (`command_line_srt_read_mode`, default `confined`)**: SRT reads are allow-all by default, so `confined` denies all of `$HOME` and re-allows only the workspace + context (system paths stay readable so commands run); `strict` denies `/` and allows only managed + a system baseline; `open` allows-all minus a secret denylist. `command_line_srt_allow_read` widens it per config. **Network is deny-all by default** — each allowlisted domain is an explicit capability grant +- **Hardened Permission Hook**: a new key-agnostic scan (`_validate_no_path_arg_escapes`) walks the full tool-args tree (nested dicts + lists) and denies any value resolving outside managed areas, closing prior fail-open gaps (path under an unrecognized key, list-valued paths, `move`/`copy` source pointing outside) without false positives +- **Parity & safety**: native-sandbox backends (Codex `--full-auto`, Claude Code) degrade `srt`→`local` to avoid nested-sandbox hangs; subagents inherit the parent's SRT settings (parity with Docker); live-verified across OpenRouter, OpenAI Responses, and Gemini backends +- All landed under TDD, with a 15-vector adversarial escape suite and an adversarially-verified multi-agent pre-merge review -### Previous Achievements (v0.0.3 - v0.1.94) +### Previous Achievements (v0.0.3 - v0.1.95) + +✅ **Steering Improvements (v0.1.95)**: Extended mid-stream steering from a UI-only capability into a programmatic, headless one — `send_steering_message()` drops guidance into a file inbox (`--inbox-dir`) routed through the same `set_pending_input` chokepoint the TUI/WebUI use — and upgraded Codex/Antigravity to interrupt-and-resume the in-flight turn (`codex exec resume` / `agy --continue`) instead of waiting for a round boundary, with `expires_at`-guarded MCP-hook payload IPC and the Antigravity `--model` flag wired through. ✅ **Parallelism Hardening — Engineering Health (v0.1.94)**: Moved the peer-context snapshot copy off the event loop (worker thread via `asyncio.to_thread`) backed by immutable, versioned snapshots (`SnapshotVersionStore`) with atomically-repointed symlinks and refcounted readers, eliminating the read-during-write race; fixed lost peer-answer revisions (R1), lost background-subagent results (R2/R3), leaked trace tasks (R4), and cancel-without-await teardown (R5); surfaced worktree-isolation degradation (D2); and unified the mid-stream injection paths (A1). No per-backend functionality changes. @@ -1586,9 +1588,9 @@ MassGen is currently in its foundational stage, with a focus on parallel, asynch We welcome community contributions to achieve these goals. -### v0.1.96 Roadmap +### v0.1.97 Roadmap -Version 0.1.96 picks up the image/video edit work deferred from v0.1.86-v0.1.95 and continues multimodal provider-parity work: +Version 0.1.97 picks up the image/video edit work deferred from v0.1.86-v0.1.96 and continues multimodal provider-parity work: #### Planned Features - **Image/Video Edit Capabilities** ([#959](https://github.com/massgen/MassGen/issues/959)): Image and video editing across providers with multi-turn editing workflows via continuation IDs diff --git a/ROADMAP.md b/ROADMAP.md index 83c850e9b..ee65e85c4 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,10 +1,10 @@ # MassGen Roadmap -**Current Version:** v0.1.95 +**Current Version:** v0.1.96 **Release Schedule:** Mondays, Wednesdays, Fridays @ 9am PT -**Last Updated:** June 8, 2026 +**Last Updated:** June 10, 2026 This roadmap outlines MassGen's development priorities for upcoming releases. Each release focuses on specific capabilities with real-world use cases. @@ -42,12 +42,30 @@ Want to contribute or collaborate on a specific track? Reach out to the track ow | Release | Target | Feature | Owner | Use Case | |---------|--------|---------|-------|----------| -| **v0.1.96** | TBD | Image/Video Edit Capabilities | @ncrispino | Check and support img/video editing capabilities — deferred from v0.1.86-v0.1.95 ([#959](https://github.com/massgen/MassGen/issues/959)) | +| **v0.1.97** | TBD | Image/Video Edit Capabilities | @ncrispino | Check and support img/video editing capabilities — deferred from v0.1.86-v0.1.96 ([#959](https://github.com/massgen/MassGen/issues/959)) | *All releases ship on MWF @ 9am PT when ready* --- +## ✅ v0.1.96 - OS-Level Agent Sandboxing (Completed) + +**Released:** June 10, 2026 + +### Features +- **SRT Sandbox Mode (`command_line_execution_mode: srt`)**: a third command-execution mode alongside `local`/`docker`. `SrtManager` (`massgen/filesystem_manager/_srt_manager.py`) wraps agent command/code execution in Anthropic's [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) (bubblewrap on Linux, Seatbelt on macOS), deriving per-agent OS-enforced filesystem + network isolation from the **same** `PathPermissionManager` policy as the app layer (defense in depth). Both the command-line MCP and the filesystem-tools MCP servers are OS-wrapped; npx/npm launchers auto-skip and keep their app-layer protection +- **Configurable Read Confinement (`command_line_srt_read_mode`, default `confined`)**: `confined` denies all of `$HOME` and re-allows only the workspace + context (system paths stay readable); `strict` denies `/` and allows only managed + a system baseline; `open` allows-all minus a secret denylist. `command_line_srt_allow_read` widens it per config. Network is deny-all by default — each allowlisted domain is an explicit capability grant +- **Hardened Permission Hook**: `_validate_no_path_arg_escapes` walks the full tool-args tree (nested dicts + lists) and denies any value resolving outside managed areas, closing prior fail-open gaps (unrecognized key, list-valued paths, `move`/`copy` source pointing outside) without false positives +- **Parity & Safety**: native-sandbox backends (Codex `--full-auto`, Claude Code) degrade `srt`→`local` to avoid nested-sandbox hangs; subagents inherit the parent's SRT settings (parity with Docker) + +### Notes +- All items landed under TDD, with a 15-vector adversarial escape suite and an adversarially-verified multi-agent pre-merge review. +- Default-off, one-knob opt-in; current behavior unchanged unless a config sets `command_line_execution_mode: srt`. +- Live-verified (macOS 15.7, srt 1.0.0) across OpenRouter, OpenAI Responses, and Gemini backends. +- Image/Video Edit Capabilities ([#959](https://github.com/massgen/MassGen/issues/959)) remain deferred to v0.1.97. + +--- + ## ✅ v0.1.95 - Steering Improvements (Completed) **Released:** June 8, 2026 @@ -66,7 +84,7 @@ Want to contribute or collaborate on a specific track? Reach out to the track ow ### Notes - All items landed under TDD, with deterministic coverage plus opt-in live-fire tests. -- Image/Video Edit Capabilities ([#959](https://github.com/massgen/MassGen/issues/959)) remain deferred to v0.1.96. +- Image/Video Edit Capabilities ([#959](https://github.com/massgen/MassGen/issues/959)) remain deferred to v0.1.97. --- @@ -382,7 +400,7 @@ Want to contribute or collaborate on a specific track? Reach out to the track ow --- -## 📋 v0.1.96 - Image/Video Edit Capabilities (Deferred from v0.1.86-v0.1.95) +## 📋 v0.1.97 - Image/Video Edit Capabilities (Deferred from v0.1.86-v0.1.96) ### Features diff --git a/ROADMAP_v0.1.97.md b/ROADMAP_v0.1.97.md index 05a7f0323..c37c2ff3b 100644 --- a/ROADMAP_v0.1.97.md +++ b/ROADMAP_v0.1.97.md @@ -1,14 +1,14 @@ -# MassGen v0.1.96 Roadmap +# MassGen v0.1.97 Roadmap **Target Release:** TBD ## Overview -Version 0.1.96 picks up the image/video edit work deferred from v0.1.86-v0.1.95 and continues multimodal provider-parity work. +Version 0.1.97 picks up the image/video edit work deferred from v0.1.86-v0.1.96 and continues multimodal provider-parity work. --- -## Feature: Image/Video Edit Capabilities (Deferred from v0.1.86-v0.1.95) +## Feature: Image/Video Edit Capabilities (Deferred from v0.1.86-v0.1.96) **Issue:** [#959](https://github.com/massgen/MassGen/issues/959) **Owner:** @ncrispino @@ -30,6 +30,7 @@ Version 0.1.96 picks up the image/video edit work deferred from v0.1.86-v0.1.95 ## Related Tracks +- **v0.1.96**: OS-Level Agent Sandboxing — `command_line_execution_mode: srt` wraps agent command/code execution in Anthropic's sandbox-runtime (bubblewrap/Seatbelt) with OS-enforced filesystem + network isolation derived from the same `PathPermissionManager` policy as the app layer (defense in depth), configurable read confinement (default `confined`), and a hardened key-agnostic permission-hook escape scan - **v0.1.95**: Steering Improvements — programmatic steering inbox (`--inbox-dir`) routed to the shared `set_pending_input` chokepoint, mid-round interrupt-and-resume for Codex and Antigravity (`codex exec resume` / `agy --continue`), MCP-server-hook payload IPC for Antigravity (codex parity), and the Antigravity `--model` flag wired through - **v0.1.94**: Parallelism Hardening (engineering health) — snapshot copy moved off the event loop with immutable versioned snapshots, lock-free concurrency-race fixes, unified mid-stream injection, and worktree-isolation degradation surfaced - **v0.1.93**: CLI package decomposition and pydantic config migration — focused `massgen/cli/` package, construction-time config validation with `Literal`-typed modes, single-source exclusion lists, dead-code removal, and test-signal/type-checking hardening diff --git a/docs/announcements/current-release.md b/docs/announcements/current-release.md new file mode 100644 index 000000000..d7baab5a2 --- /dev/null +++ b/docs/announcements/current-release.md @@ -0,0 +1,54 @@ +# MassGen v0.1.96 Release Announcement (OS-Level Agent Sandboxing) + + + +## Release Summary + +MassGen v0.1.96 — OS-Level Agent Sandboxing! 🚀 Agents that run commands can now be confined at the OS level via Anthropic's [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) (`srt`), with a hardened permission hook on top. Defense in depth: OS and app layers from the same path policy, both active. Default-off, one knob (`command_line_execution_mode: srt`). + +## Install + +```bash +pip install massgen==0.1.96 +``` + +## Links + +- **Release notes:** https://github.com/massgen/MassGen/releases/tag/v0.1.96 +- **X post:** [TO BE ADDED AFTER POSTING] +- **LinkedIn post:** [TO BE ADDED AFTER POSTING] + +## Posting Notes + +- **Suggested image:** A terminal screenshot of the `srt_sandbox.yaml` demo run — agent writes to its workspace successfully, then an out-of-workspace read (`~/.ssh/id_rsa`) and network egress are both denied with `Operation not permitted`. This is a headless/security feature, so a clean before/after terminal capture beats a TUI screenshot. + +--- + +## Full Announcement (for LinkedIn) + +Copy everything below this line, then append content from `feature-highlights.md`: + +--- + +MassGen v0.1.96 — OS-Level Agent Sandboxing! 🚀 Agents that run commands can now be confined at the OS level, not just by MassGen's permission layer. Both layers derive from the same path policy and stay active together, closing the shell and file-tool escape hatches at once. Default-off, opt-in with a single knob. + +**Key Improvements:** + +🛡️ **OS-level execution sandbox** — `command_line_execution_mode: srt` wraps agent command/code execution in Anthropic's sandbox-runtime (bubblewrap on Linux, Seatbelt on macOS) for OS-enforced filesystem + network isolation, derived from the same permission policy as the app layer. Network is deny-all by default. + +🔒 **Configurable read confinement** — by default (`confined`), sandboxed commands can't read your `$HOME` (secrets, other projects), only the workspace + context, while system paths stay readable so commands still run. + +🧱 **Hardened permission hook** — a key-agnostic scan walks the full tool-args tree and denies any path resolving outside managed areas, closing prior fail-open gaps with no false positives. + +**Install:** + +```bash +pip install massgen==0.1.96 +``` + +Feature highlights: + + diff --git a/docs/source/index.rst b/docs/source/index.rst index b4de5c49c..b159dd5f8 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -209,6 +209,10 @@ Key Features Recent Releases --------------- +**v0.1.96 (June 10, 2026)** - OS-Level Agent Sandboxing + +Adds a real OS-level execution sandbox for agents via Anthropic's `sandbox-runtime `_ (``srt``: bubblewrap on Linux, Seatbelt on macOS) and hardens the application-layer permission hook against file-tool escapes. Defense in depth by design: the OS layer (``SrtManager``) and the app layer (``PathPermissionManager``) derive from the *same* path policy and both stay active — SRT closes the shell escape hatch, the hardened hook closes file-tool escapes. One-knob opt-in (``command_line_execution_mode: srt``), default-off. Read confinement defaults to ``confined`` (denies ``$HOME``, allows workspace + context), network is deny-all by default, native-sandbox backends (Codex ``--full-auto``, Claude Code) degrade ``srt``→``local``, and subagents inherit the parent's SRT settings. + **v0.1.95 (June 8, 2026)** - Steering Improvements Extends mid-stream injection into a programmatic, headless capability and upgrades it to true interrupt-and-resume for the CLI backends. A file inbox (``--inbox-dir``) lets ``--automation`` and any UI-less caller drop human guidance into a streaming agent through the same chokepoint the TUI/WebUI use; Codex and Antigravity now interrupt the in-flight turn and resume (``codex exec resume`` / ``agy --continue``) instead of waiting for a round boundary. Adds MCP-server-hook payload IPC for Antigravity (codex parity), wires the Antigravity ``--model`` flag, and fixes ``--inbox-dir`` for resumed sessions plus ``expires_at``-guarded steering carryforward. diff --git a/massgen/configs/README.md b/massgen/configs/README.md index 5075309ec..59e959c09 100644 --- a/massgen/configs/README.md +++ b/massgen/configs/README.md @@ -227,7 +227,27 @@ Most configurations use environment variables for API keys:so ## Release History & Examples -### v0.1.95 - Latest +### v0.1.96 - Latest +**OS-Level Agent Sandboxing:** Real OS-level execution sandbox (Anthropic sandbox-runtime) + hardened permission hook, defense in depth + +**Key Features:** +- **SRT sandbox mode** (`command_line_execution_mode: srt`): wraps agent command/code execution in `srt` (bubblewrap on Linux, Seatbelt on macOS); OS-enforced filesystem + network isolation derived from the same path policy as the app layer. One-knob opt-in, default-off +- **Configurable read confinement** (`command_line_srt_read_mode`, default `confined`): denies `$HOME` and re-allows only the workspace + context; `strict` / `open` modes available. Network is deny-all by default +- **Hardened permission hook**: key-agnostic scan walks the full tool-args tree and denies any value resolving outside managed areas, closing prior fail-open gaps without false positives +- **Parity & safety**: native-sandbox backends (Codex `--full-auto`, Claude Code) degrade `srt`→`local`; subagents inherit parent SRT settings + +**Try It:** +```bash +pip install massgen==0.1.96 + +# Prerequisite (one-time): npm install -g @anthropic-ai/sandbox-runtime +uv run massgen --automation \ + --config massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml \ + "Create out.txt in the workspace, then try to read ~/.ssh/id_rsa" +# Expected: the workspace write succeeds; reading ~/.ssh and all network egress are denied. +``` + +### v0.1.95 **Steering Improvements:** Existing mid-stream steering extended to headless callers + upgraded to interrupt-and-resume **Key Features:** From c7b683df5e8bf87fc48150f41b40363a7bb2f4e1 Mon Sep 17 00:00:00 2001 From: HenryQi Date: Thu, 11 Jun 2026 01:44:28 +0800 Subject: [PATCH 7/7] update docs --- CHANGELOG.md | 3 ++ docs/announcements/github-release-v0.1.94.md | 35 ------------- docs/announcements/github-release-v0.1.95.md | 50 ------------------ docs/announcements/github-release-v0.1.96.md | 53 ++++++++++++++++++++ 4 files changed, 56 insertions(+), 85 deletions(-) delete mode 100644 docs/announcements/github-release-v0.1.94.md delete mode 100644 docs/announcements/github-release-v0.1.95.md create mode 100644 docs/announcements/github-release-v0.1.96.md diff --git a/CHANGELOG.md b/CHANGELOG.md index e10fa97f1..fbbbf48b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -88,6 +88,9 @@ Strengthen the orchestrator's parallel execution: move blocking snapshot work of ## Recent Releases +**v0.1.96 (June 10, 2026)** - OS-Level Agent Sandboxing +Adds a real OS-level execution sandbox for agents via Anthropic's [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) (`srt`) and hardens the application-layer permission hook against file-tool escapes. The new opt-in `command_line_execution_mode: srt` derives OS-enforced filesystem and network isolation from the same `PathPermissionManager` policy as MassGen's app layer, defaults network to deny-all, confines reads away from `$HOME` by default, degrades to native backend sandboxes where appropriate, and preserves subagent parity by inheriting parent SRT settings. + **v0.1.95 (June 8, 2026)** - Steering Improvements Extends mid-stream injection into a programmatic, headless capability and upgrades it to true interrupt-and-resume for the CLI backends. A file inbox (`--inbox-dir`) lets `--automation` and any UI-less caller drop human guidance into a streaming agent through the same chokepoint the TUI/WebUI use; Codex and Antigravity now interrupt the in-flight turn and resume (`codex exec resume` / `agy --continue`) instead of waiting for a round boundary. Adds MCP-server-hook payload IPC for Antigravity (codex parity), wires the Antigravity `--model` flag, and fixes `--inbox-dir` for resumed sessions plus `expires_at`-guarded steering carryforward. diff --git a/docs/announcements/github-release-v0.1.94.md b/docs/announcements/github-release-v0.1.94.md deleted file mode 100644 index 9d392da27..000000000 --- a/docs/announcements/github-release-v0.1.94.md +++ /dev/null @@ -1,35 +0,0 @@ -# 🚀 Release Highlights — v0.1.94 (2026-06-05) - -v0.1.94 — Parallelism Hardening (Engineering Health) — strengthens the orchestrator's parallel execution. It moves the snapshot copy off the event loop so agents keep streaming concurrently, backs it with immutable versioned snapshots that keep the off-loop copy safe, and closes latent concurrency races. No per-backend functionality changes (parity principle). - -### ⚡ Snapshot Copy Off the Event Loop -- `FilesystemManager.copy_snapshots_to_temp_workspace` now runs its blocking `rmtree`/`copytree`/scrub on a worker thread via `asyncio.to_thread` -- One agent's snapshot copy no longer stalls every other agent's streaming - -### 🔒 Immutable, Versioned Snapshots -- Each agent's snapshot path `/` is now a symlink to an immutable `/.versions//v` directory -- `save_snapshot` (and the interrupted-turn partial save) publish a fresh version and atomically repoint the symlink instead of rewriting in place -- The peer-context copy `acquire`s (refcounts) the current version for the duration of its copy; GC never deletes a pinned or in-flight version -- Eliminates the read-during-write race the off-loop copy would otherwise expose — coordinated by the new `SnapshotVersionStore` - -### 🧵 Concurrency Correctness Fixes -- **R1** — lost peer-answer revision across the injection `await` window (counts now captured at selection time) -- **R2/R3** — lost background-subagent result from a blind queue `pop` (consume only the consumed ids) -- **R4** — leaked background trace-analyzer tasks on cleanup (cancelled before the flush) -- **R5** — cancel-without-await teardown (`cancel_all_subagents` now awaits cancellations against the live registry) -- **D2** — worktree-isolation degradation never surfaced because `emit_status` was called with an invalid `status=` kwarg whose `TypeError` was swallowed -- **D3** — changedoc enrichment made non-fatal - -### 🧩 Unified Mid-Stream Injection -- The two ~150-line per-backend `get_injection_content` closures collapsed into one `build_midstream_injection(..., native=)`, preserving the `update_context → refresh_checklist` side-effect order on both paths -- The triplicated background-wait interrupt provider consolidated into one helper - -### 🧪 Tests -- New race/regression suites driven under TDD with cost-free simulation: `test_concurrency_race_fixes.py`, `test_snapshot_version_store.py`, `test_snapshot_versioned_save.py`, `test_snapshot_copy_offload.py`, `test_midstream_injection_unified.py`, `test_wait_interrupt_provider.py` - ---- - -### 📖 Install -```bash -pip install massgen==0.1.94 -``` diff --git a/docs/announcements/github-release-v0.1.95.md b/docs/announcements/github-release-v0.1.95.md deleted file mode 100644 index 6035a24cf..000000000 --- a/docs/announcements/github-release-v0.1.95.md +++ /dev/null @@ -1,50 +0,0 @@ -# 🚀 Release Highlights — v0.1.95 (2026-06-08) - -v0.1.95 — Steering Improvements — extends mid-stream injection from a UI-only capability into a programmatic, headless one, and upgrades it from inject-at-next-boundary into true interrupt-and-resume for the CLI backends. A human (or any UI-less caller) can now drop guidance into an agent *while it is streaming* — over a file inbox in `--automation`, or through the MCP-middleware hook path — and Codex/Antigravity will interrupt the in-flight turn, fold the steering in, and resume rather than restart. The injection chokepoint stays shared across TUI, WebUI, and the new headless path. - -### 📨 Programmatic Steering Inbox (`--inbox-dir`) -- `send_steering_message()` (`massgen/steering.py`) drops a `msg_*.json` into a caller-known inbox directory -- `RuntimeInboxPoller` routes it through `RuntimeInputDelivery.poll_runtime_inbox` to the same `set_pending_input` chokepoint the TUI (`_queue_human_input`) and WebUI (`broadcast_response`) already use -- Reachable from `--automation` and any UI-less caller, with per-message targeting (one agent / a subset / broadcast); the resolved inbox is announced as `RUNTIME_INBOX:` in automation output - -### ⏯️ Interrupt-and-Resume Steering (Codex & Antigravity) -- When steering arrives mid-turn, the watcher kills the in-flight turn and resumes — `codex exec resume ` for Codex, `agy --continue -p ` for Antigravity — folding the steering in without waiting for a round boundary -- Antigravity promotes pre-interrupt scratch deliverables to the workspace first, so work done before the interrupt isn't lost -- Gated by `supports_interrupt_resume()` with `interrupt_poll_seconds` / `max_interrupts_per_turn` knobs - -### 🪝 MCP-Server-Hook Payload IPC (Antigravity, codex parity) -- `write_post_tool_use_hook()` / `read_unconsumed_hook_content()` with `expires_at`-guarded payloads consumed by the MCP middleware (`massgen/mcp_tools/hook_middleware.py`) -- The backend-agnostic per-chunk injection flush now works for `agy` the same way it does for codex -- The Antigravity `--model` flag is now actually passed to `agy` (was previously resolved but omitted) - -### 🔧 Bug Fixes -- **`--inbox-dir` honored for all session modes**: the env-var export lived inside the new-session branch, so `--session-id` / config `session_id` / `--continue` runs silently dropped programmatic steering — now hoisted into `_resolve_runtime_inbox()` before the branch -- **Stale steering carryforward**: `read_unconsumed_hook_content()` now drops payloads past `expires_at` (fail-open on malformed values), so a stale hook can't trigger an unexpected interrupt/resume — both backends -- **Swallowed watcher failures**: interrupt/resume cleanup now logs non-cancellation failures at debug instead of passing — both backends -- **Round-1 native-hook gap (Antigravity)**: `hook_dir` is set at orchestrator fetch time so first-round hooks are wired before the initial stream; middleware `hook_dir` coerced to `Path` - -### 🧪 Tests -- New deterministic suites: `test_steering_inbox.py`, `test_codex_interrupt_resume.py`, `test_mcp_hook_middleware.py`, `test_live_proc_io.py`; expanded `test_antigravity_cli_backend.py` -- New opt-in live-fire tests (`@pytest.mark.live_api`): `test_steering_live.py`, `test_codex_interrupt_resume_live.py`, `test_antigravity_interrupt_resume_live.py`, `test_codex_middleware_firing_live.py`, `test_codex_hook_firing_live.py` — with non-blocking stdout polling so a buffering child can't hang the test - ---- - -### 📖 Install -```bash -pip install massgen==0.1.95 -``` - -### 🧭 Try It — headless steering -```bash -# Start a run, exposing a file inbox for programmatic steering -uv run massgen --automation --inbox-dir /tmp/inbox \ - --config massgen/configs/debug/codex_mcp_middleware_test.yaml "Write and refine a short essay." - -# In another shell, drop a steering message mid-stream: -python -c "from massgen.steering import send_steering_message; send_steering_message('/tmp/inbox', 'prioritize concision')" -``` - -## What's Changed -* feat: Programmatic mid-stream injection and switch to resume by @ncrispino in https://github.com/massgen/MassGen/pull/1114 - -**Full Changelog**: https://github.com/massgen/MassGen/compare/v0.1.94...v0.1.95 diff --git a/docs/announcements/github-release-v0.1.96.md b/docs/announcements/github-release-v0.1.96.md new file mode 100644 index 000000000..a66814437 --- /dev/null +++ b/docs/announcements/github-release-v0.1.96.md @@ -0,0 +1,53 @@ +# 🚀 Release Highlights — v0.1.96 (2026-06-10) + +v0.1.96 — OS-Level Agent Sandboxing — adds an opt-in OS sandbox for agent command execution via Anthropic's [sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) (`srt`: bubblewrap on Linux, Seatbelt on macOS), and hardens MassGen's application-layer permission hook against file-tool escapes. Defense in depth by design: both layers derive from the same path policy and stay active together. Default-off, one knob: `command_line_execution_mode: srt`. + +### 🛡️ OS-Level Execution Sandbox +- New `command_line_execution_mode: srt` wraps agent command/code execution in `srt` +- `SrtManager` derives per-agent settings from `PathPermissionManager.managed_paths`: writable paths become `allowWrite`, read-only/protected paths become `denyWrite` +- Command-line MCP execution and filesystem-tools MCP servers are both OS-wrapped where the launcher supports it + +### 🔒 Read & Network Confinement +- `command_line_srt_read_mode` defaults to `confined`: deny `$HOME`, then re-allow only workspace + context paths while keeping system runtime paths readable +- `strict` and `open` read modes are available for tighter or broader policies +- Network is deny-all by default; `command_line_srt_network_allowed_domains` is an explicit capability grant +- Built-in secret-store read denies are active, with `command_line_srt_deny_read` and `command_line_srt_allow_read` for config-specific adjustments + +### 🧱 Permission-Hook Hardening +- `PathPermissionManager` now scans the full tool-argument tree, not just known path keys +- Blocks escapes through unrecognized keys, nested dicts/lists, `move`/`copy` sources, absolute paths, `..`, and symlinks resolving outside managed areas +- Keeps false positives low by skipping content-like fields and resolving non-path strings inside the workspace + +### ⚙️ Backend Parity & Degrade Behavior +- Native-sandbox backends (Codex `--full-auto`, Claude Code) degrade `srt` to `local` to avoid nested sandbox hangs +- Subagents inherit parent `command_line_srt_*` settings, matching Docker inheritance behavior +- Framework MCP read roots are re-allowed under confined/strict profiles so wrapped filesystem-tool servers can read their own runtime while user secrets remain denied + +### 🧪 Tests +- New deterministic suites: `test_srt_manager.py`, `test_srt_filesystem_integration.py`, `test_srt_backend_degrade.py`, `test_path_permission_hook_adversarial.py` +- Expanded `test_subagent_manager.py` with SRT settings inheritance coverage +- Live-verified on macOS 15.7 with `srt` 1.0.0 across standalone SRT, OpenRouter/chatcompletion, OpenAI Responses, Gemini, Codex, and Claude Code paths + +--- + +### 📦 Install +```bash +pip install massgen==0.1.96 +``` + +### ▶️ Try It — SRT sandboxing +```bash +# Prerequisite: +npm install -g @anthropic-ai/sandbox-runtime + +uv run massgen --automation \ + --config massgen/configs/tools/filesystem/sandbox/srt_sandbox.yaml \ + "Create out.txt in the workspace, then try to read ~/.ssh/id_rsa" +``` + +Expected: the workspace write succeeds; out-of-scope reads and network egress are denied by the OS sandbox. + +## What's Changed +* feat: OS-level SRT agent sandboxing + permission-hook hardening by @ncrispino in https://github.com/massgen/MassGen/pull/1125 + +**Full Changelog**: https://github.com/massgen/MassGen/compare/v0.1.95...v0.1.96