diff --git a/src/benchflow/providers/runtime.py b/src/benchflow/providers/runtime.py index 750f2291..96a294a1 100644 --- a/src/benchflow/providers/runtime.py +++ b/src/benchflow/providers/runtime.py @@ -115,9 +115,12 @@ def _docker_host_address() -> str: return "host.docker.internal" -# Sandbox environments where the agent runs on the same host as the proxy -# (the host's loopback / docker bridge is reachable from the agent process). -_LOCAL_REACHABLE_ENVIRONMENTS = {"docker", "local", "host", ""} +# Remote cloud sandbox environments where the agent runs on a *different* +# machine than the host proxy. The canonical environment set produced by the +# runtime is {docker, daytona, modal}; these are the ones a host-bound proxy +# cannot be reached from. Any other (unknown) value is treated conservatively +# as reachable — see ``host_proxy_reachable_from_agent``. +_REMOTE_UNREACHABLE_ENVIRONMENTS = {"daytona", "modal"} def host_proxy_reachable_from_agent(environment: str) -> bool: @@ -128,20 +131,33 @@ def host_proxy_reachable_from_agent(environment: str) -> bool: - ``docker``: the container reaches the host via the docker bridge / ``host.docker.internal``. - - ``local``/``host``: the agent runs directly on the host. - Remote cloud sandboxes (e.g. ``daytona``) run the agent on a different - machine. ``127.0.0.1`` there is the *sandbox's* own loopback, and the - Daytona SSH gateway rejects ``ssh -R`` reverse tunnels, so there is no - address that routes back to the host proxy. + Remote cloud sandboxes (``daytona``, ``modal``) run the agent on a + different machine. ``127.0.0.1`` there is the *sandbox's* own loopback, + and the Daytona SSH gateway rejects ``ssh -R`` reverse tunnels, so there + is no address that routes back to the host proxy. + + An unrecognized environment is treated as reachable (conservative: assume + same-host so the proxy is still wired up rather than silently skipped). """ - return environment in _LOCAL_REACHABLE_ENVIRONMENTS + return environment not in _REMOTE_UNREACHABLE_ENVIRONMENTS def _bedrock_proxy_command( *, environment: str, ) -> str: + """Return the address the agent uses to reach a host-bound proxy. + + Precondition: ``host_proxy_reachable_from_agent(environment)`` is True — + this is only ever reached for environments that share the host's network + namespace. The reachability predicate above is the single gate that + decides whether a host proxy is usable at all. + """ + assert host_proxy_reachable_from_agent(environment), ( + f"_bedrock_proxy_command called for unreachable environment " + f"{environment!r}; host_proxy_reachable_from_agent must gate this" + ) if environment == "docker": return _docker_host_address() return BEDROCK_PROXY_LOCAL_HOST @@ -401,11 +417,33 @@ async def ensure_bedrock_proxy_runtime( runtime: ProviderRuntime | None, environment: str, ) -> tuple[dict[str, str], ProviderRuntime | None]: - """Start the host-side Bedrock proxy if needed and wire env vars to it.""" + """Start the host-side Bedrock proxy if needed and wire env vars to it. + + Unlike the usage telemetry proxy (pure telemetry — safe to skip when the + agent cannot reach the host), the Bedrock proxy is *load-bearing*: it + translates Anthropic/OpenAI requests into AWS Bedrock Converse calls and + signs them with host AWS credentials. There is no direct path for the + agent to reach Bedrock without it. So on a remote sandbox where the host + proxy is unreachable, the run cannot succeed — we fail fast here with an + actionable error instead of injecting an unreachable ``127.0.0.1`` base + URL that would surface as an opaque mid-run ``ECONNREFUSED``. + """ if not needs_provider_runtime(model): return agent_env, runtime assert model is not None + if not host_proxy_reachable_from_agent(environment): + if runtime is not None: + await stop_provider_runtime(runtime) + raise RuntimeError( + f"Bedrock-routed models are not supported on the " + f"'{environment}' sandbox: the host-side Bedrock proxy that " + f"translates and signs requests to AWS Bedrock binds to the " + f"host machine and is unreachable from the agent, which runs " + f"on a separate remote host. Re-run with '--sandbox docker', " + f"or select a model that is not routed through AWS Bedrock." + ) + if runtime is None: backend_model = strip_provider_prefix(model) frontend_model = _bedrock_frontend_model( diff --git a/tests/test_provider_runtime.py b/tests/test_provider_runtime.py index dc1b48f6..829d8244 100644 --- a/tests/test_provider_runtime.py +++ b/tests/test_provider_runtime.py @@ -222,3 +222,77 @@ async def test_stop_provider_runtime_stops_server(self): ) await stop_provider_runtime(runtime) server.stop.assert_awaited_once() + + +class TestBedrockProxyRemoteSandbox: + """The Bedrock proxy is load-bearing; on a remote sandbox where the host + proxy is unreachable the run must fail fast rather than inject an + unreachable 127.0.0.1 base URL (the Daytona telemetry-proxy twin bug).""" + + @pytest.mark.asyncio + @pytest.mark.parametrize("environment", ["daytona", "modal"]) + async def test_bedrock_on_remote_sandbox_fails_fast(self, environment): + with pytest.raises(RuntimeError, match="not supported on the"): + await ensure_bedrock_proxy_runtime( + agent="claude-agent-acp", + agent_env={ + "AWS_BEARER_TOKEN_BEDROCK": "bedrock-token", + "AWS_REGION": "us-east-1", + }, + model="aws-bedrock/anthropic.claude-haiku-4-5-20251001-v1:0", + runtime=None, + environment=environment, + ) + + @pytest.mark.asyncio + async def test_remote_sandbox_error_is_actionable(self): + with pytest.raises(RuntimeError) as exc: + await ensure_bedrock_proxy_runtime( + agent="codex-acp", + agent_env={"AWS_REGION": "us-east-1"}, + model="aws-bedrock/openai.gpt-oss-20b-1:0", + runtime=None, + environment="daytona", + ) + message = str(exc.value) + assert "daytona" in message + assert "--sandbox docker" in message + + @pytest.mark.asyncio + async def test_non_bedrock_model_on_remote_sandbox_is_noop(self): + # A non-Bedrock model never needs the proxy, so a remote sandbox is fine. + agent_env = {"ANTHROPIC_API_KEY": "sk-test"} + updated, runtime = await ensure_bedrock_proxy_runtime( + agent="claude-agent-acp", + agent_env=agent_env, + model="claude-haiku-4-5", + runtime=None, + environment="daytona", + ) + assert updated == agent_env + assert runtime is None + + @pytest.mark.asyncio + async def test_stale_runtime_stopped_when_environment_unreachable(self): + server = AsyncMock() + runtime = ProviderRuntime( + kind="aws-bedrock", + host="host.docker.internal", + port=8099, + server=server, + ) + with pytest.raises(RuntimeError, match="not supported on the"): + await ensure_bedrock_proxy_runtime( + agent="claude-agent-acp", + agent_env={"AWS_REGION": "us-east-1"}, + model="aws-bedrock/anthropic.claude-haiku-4-5-20251001-v1:0", + runtime=runtime, + environment="modal", + ) + server.stop.assert_awaited_once() + + def test_bedrock_proxy_command_rejects_unreachable_environment(self): + from benchflow.providers.runtime import _bedrock_proxy_command + + with pytest.raises(AssertionError): + _bedrock_proxy_command(environment="daytona") diff --git a/tests/test_usage_proxy.py b/tests/test_usage_proxy.py index 701ed991..4237f634 100644 --- a/tests/test_usage_proxy.py +++ b/tests/test_usage_proxy.py @@ -544,12 +544,14 @@ async def stop(self): def test_host_proxy_reachable_only_for_local_environments(): from benchflow.providers.runtime import host_proxy_reachable_from_agent + # docker shares the host's network namespace via the docker bridge. assert host_proxy_reachable_from_agent("docker") is True - assert host_proxy_reachable_from_agent("local") is True - assert host_proxy_reachable_from_agent("host") is True - assert host_proxy_reachable_from_agent("") is True + # Remote cloud sandboxes run the agent on a separate machine. assert host_proxy_reachable_from_agent("daytona") is False assert host_proxy_reachable_from_agent("modal") is False + # Unrecognized environments are treated conservatively as reachable. + assert host_proxy_reachable_from_agent("") is True + assert host_proxy_reachable_from_agent("some-future-local-env") is True def test_total_tokens_is_sum_of_parts():