diff --git a/README.md b/README.md index 61625a1..117267a 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,7 @@ You can specify a particular environment with the ```--env ``` flag - `playwright`: Runs the browser locally using Playwright. - `browserbase`: Connects to a Browserbase instance. +- `agentcore`: Connects to Amazon Bedrock AgentCore Browser. **Local Playwright** @@ -117,6 +118,43 @@ Runs the agent using Browserbase as the browser backend. Ensure the proper Brows python main.py --query="Go to Google and type 'Hello World' into the search bar" --env="browserbase" ``` +**Amazon Bedrock AgentCore** + +Runs the agent using Amazon Bedrock AgentCore Browser as the backend. Requires AWS credentials configured and the `bedrock-agentcore` Python package installed. + +```bash +python main.py --query="Search for great deals on Alexa devices" --env="agentcore" +``` + +The AWS region is automatically detected from your AWS configuration (environment variables, ~/.aws/config, or IAM role). You can override it by setting: + +```bash +export AWS_REGION="us-east-1" +``` + +**Session Recording (AgentCore only)** + +Enable session recording to S3 for replay and debugging: + +```bash +# Auto-create IAM role (recommended) +python main.py --query="Search for great deals on Alexa devices" --env="agentcore" \ + --recording_bucket="my-recordings-bucket" \ + --create_execution_role + +# Or provide existing role +python main.py --query="Search for great deals on Alexa devices" --env="agentcore" \ + --recording_bucket="my-recordings-bucket" \ + --recording_prefix="sessions" \ + --execution_role_arn="arn:aws:iam::123456789012:role/AgentCoreRecordingRole" +``` + +The auto-created role is scoped to the specified S3 bucket/prefix with minimal permissions: +- Trust policy: `bedrock-agentcore.amazonaws.com` +- S3 permissions: `s3:PutObject`, `s3:ListMultipartUploadParts`, `s3:AbortMultipartUpload` + +Recordings can be viewed using the AgentCore session replay viewer. + ## Agent CLI The `main.py` script is the command-line interface (CLI) for running the browser agent. @@ -126,9 +164,11 @@ The `main.py` script is the command-line interface (CLI) for running the browser | Argument | Description | Required | Default | Supported Environment(s) | |-|-|-|-|-| | `--query` | The natural language query for the browser agent to execute. | Yes | N/A | All | -| `--env` | The computer use environment to use. Must be one of the following: `playwright`, or `browserbase` | No | N/A | All | +| `--env` | The computer use environment to use. Must be one of the following: `playwright`, `browserbase`, or `agentcore` | No | playwright | All | | `--initial_url` | The initial URL to load when the browser starts. | No | https://www.google.com | All | | `--highlight_mouse` | If specified, the agent will attempt to highlight the mouse cursor's position in the screenshots. This is useful for visual debugging. | No | False (not highlighted) | `playwright` | +| `--recording_bucket` | S3 bucket name for session recording (bucket name only, not ARN). Example: `my-recordings-bucket` | No | None | `agentcore` | +| `--recording_prefix` | S3 prefix for session recordings. | No | recordings | `agentcore` | ### Environment Variables @@ -137,3 +177,4 @@ The `main.py` script is the command-line interface (CLI) for running the browser | GEMINI_API_KEY | Your API key for the Gemini model. | Yes | | BROWSERBASE_API_KEY | Your API key for Browserbase. | Yes (when using the browserbase environment) | | BROWSERBASE_PROJECT_ID | Your Project ID for Browserbase. | Yes (when using the browserbase environment) | +| AWS_REGION | AWS region for AgentCore Browser. | No (auto-detected from AWS config when using agentcore environment) | diff --git a/computers/__init__.py b/computers/__init__.py index 42f870e..b15c8d0 100644 --- a/computers/__init__.py +++ b/computers/__init__.py @@ -14,10 +14,12 @@ from .computer import Computer, EnvState from .browserbase.browserbase import BrowserbaseComputer from .playwright.playwright import PlaywrightComputer +from .agentcore.agentcore import AgentCoreComputer __all__ = [ "Computer", "EnvState", "BrowserbaseComputer", "PlaywrightComputer", + "AgentCoreComputer", ] diff --git a/computers/agentcore/__init__.py b/computers/agentcore/__init__.py new file mode 100644 index 0000000..0402cae --- /dev/null +++ b/computers/agentcore/__init__.py @@ -0,0 +1,3 @@ +from .agentcore import AgentCoreComputer + +__all__ = ["AgentCoreComputer"] diff --git a/computers/agentcore/agentcore.py b/computers/agentcore/agentcore.py new file mode 100644 index 0000000..9528b79 --- /dev/null +++ b/computers/agentcore/agentcore.py @@ -0,0 +1,149 @@ +import os + +import termcolor +from playwright.sync_api import sync_playwright + +from ..playwright.playwright import PlaywrightComputer +from . import utils + + +class AgentCoreComputer(PlaywrightComputer): + """Connects to Amazon Bedrock AgentCore Browser via CDP. + + Supports optional session recording to S3 for replay and debugging. + """ + + def __init__( + self, + screen_size: tuple[int, int], + initial_url: str = "https://www.google.com", + recording_bucket: str | None = None, + recording_prefix: str = "recordings", + execution_role_arn: str | None = None, + create_execution_role: bool = False, + browser_identifier: str | None = None, + region: str | None = None, + ): + from boto3.session import Session + + super().__init__(screen_size, initial_url) + self._recording_bucket: str | None = recording_bucket + self._recording_prefix: str = recording_prefix + self._execution_role_arn: str | None = execution_role_arn + self._create_execution_role: bool = create_execution_role + self._browser_identifier: str = ( + browser_identifier or + os.getenv("AGENTCORE_BROWSER_IDENTIFIER", "aws.browser.v1") + ) + # Determine region with fallback chain + boto_region = Session().region_name + self._region: str = ( + region + or os.getenv("AGENTCORE_REGION") + or os.getenv("AWS_REGION") + or (boto_region if isinstance(boto_region, str) else None) + or "us-west-2" + ) + self._created_browser: bool = False + self._client = None + + def __enter__(self): + from bedrock_agentcore.tools.browser_client import BrowserClient + + print("Creating AgentCore browser session...") + + region = self._region + + # Create browser with recording if bucket specified + browser_identifier_to_use = self._browser_identifier + if self._recording_bucket: + # If browser_identifier is already a browser ID (starts with "br-"), use it directly + if self._browser_identifier.startswith("br-"): + termcolor.cprint( + f"Using provided browser ID: {self._browser_identifier}", + color="cyan" + ) + browser_identifier_to_use = self._browser_identifier + else: + # Create a unique browser name based on the bucket and prefix + # This ensures each recording configuration gets its own browser + import hashlib + config_hash = hashlib.sha256( + f"{self._recording_bucket}/{self._recording_prefix}".encode() + ).hexdigest()[:8] + browser_name = f"recording_{config_hash}" + + self._execution_role_arn, browser_id = utils.setup_browser_recording( + browser_name, + self._browser_identifier, + self._recording_bucket, + self._recording_prefix, + self._execution_role_arn, + self._create_execution_role, + region + ) + # Use the custom browser ID instead of the original identifier + browser_identifier_to_use = browser_id + + self._client = BrowserClient(region) + + session_id = self._client.start( + identifier=browser_identifier_to_use, + name="gemini-browser-session" + ) + print(f"AgentCore browser session started: {session_id}") + + ws_url, headers = self._client.generate_ws_headers() + + self._playwright = sync_playwright().start() + self._browser = self._playwright.chromium.connect_over_cdp( + ws_url, + headers=headers + ) + self._context = self._browser.contexts[0] + self._page = self._context.pages[0] + + # Set viewport explicitly (CDP connection doesn't inherit from session config) + self._page.set_viewport_size({ + "width": self._screen_size[0], + "height": self._screen_size[1] + }) + + self._page.goto(self._initial_url) + + self._context.on("page", self._handle_new_page) + + termcolor.cprint( + f"AgentCore browser session started in {region}", + color="green", + attrs=["bold"], + ) + + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + # Clean up in reverse order, with error handling for each step + try: + if self._page: + self._page.close() + + if self._context: + self._context.close() + + if self._browser: + self._browser.close() + finally: + try: + if self._client: + _ = self._client.stop() + finally: + try: + if self._playwright: + self._playwright.stop() + finally: + termcolor.cprint( + "AgentCore browser session stopped", + color="green", + attrs=["bold"], + ) + diff --git a/computers/agentcore/utils.py b/computers/agentcore/utils.py new file mode 100644 index 0000000..644c5e3 --- /dev/null +++ b/computers/agentcore/utils.py @@ -0,0 +1,243 @@ +"""Utility functions for AgentCore browser management.""" + +import json +import time +import hashlib +import termcolor + + +def create_recording_role( + browser_identifier: str, recording_bucket: str, recording_prefix: str, region: str +) -> str: + """Create IAM role scoped to recording bucket. + + Args: + browser_identifier: Browser identifier (e.g., "aws.browser.v1") + recording_bucket: S3 bucket name for recordings + recording_prefix: S3 prefix for recordings + region: AWS region + + Returns: + ARN of the created or existing IAM role + """ + import boto3 + + # IAM is global, but STS should use the specified region + iam = boto3.client("iam", region_name=region) + sts = boto3.client("sts", region_name=region) + account_id = sts.get_caller_identity()["Account"] + + # Role name uses hash of region/bucket/prefix for uniqueness + role_hash = hashlib.md5( + f"{region}/{recording_bucket}/{recording_prefix}".encode() + ).hexdigest() + role_name = f"AgentCoreBrowserRecording-{role_hash}" + + # Policy name uses same hash + policy_name = f"S3RecordingAccess-{role_hash}" + + trust_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Service": "bedrock-agentcore.amazonaws.com"}, + "Action": "sts:AssumeRole", + "Condition": { + "StringEquals": {"aws:SourceAccount": account_id}, + "ArnLike": { + "aws:SourceArn": f"arn:aws:bedrock-agentcore:{region}:{account_id}:*" + }, + }, + } + ], + } + + role_created = False + try: + iam.create_role( + RoleName=role_name, + AssumeRolePolicyDocument=json.dumps(trust_policy), + Description="Role for AgentCore browser recording", + ) + role_created = True + termcolor.cprint(f"Created IAM role: {role_name}", color="green") + except iam.exceptions.EntityAlreadyExistsException: + termcolor.cprint(f"Using existing IAM role: {role_name}", color="yellow") + + # Always ensure policy exists for this bucket/prefix + permissions_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:PutObject", + "s3:ListMultipartUploadParts", + "s3:AbortMultipartUpload", + ], + "Resource": f"arn:aws:s3:::{recording_bucket}/{recording_prefix}/*", + "Condition": {"StringEquals": {"aws:ResourceAccount": account_id}}, + } + ], + } + + iam.put_role_policy( + RoleName=role_name, + PolicyName=policy_name, + PolicyDocument=json.dumps(permissions_policy), + ) + + if role_created: + # Wait for IAM propagation + termcolor.cprint("Waiting for IAM role to propagate...", color="yellow") + time.sleep(10) + + return f"arn:aws:iam::{account_id}:role/{role_name}" + + +def get_or_create_browser( + control_client, + browser_name: str, + execution_role_arn: str, + region: str, + recording_bucket: str = None, + recording_prefix: str = "recordings", +) -> str: + """Get existing browser or create new one with recording configured. + + Args: + control_client: boto3 bedrock-agentcore-control client + browser_name: Name for the browser resource + execution_role_arn: IAM role ARN for browser execution + region: AWS region for error messages and debugging + recording_bucket: Optional S3 bucket for session recording + recording_prefix: S3 prefix for recordings + + Returns: + Browser ID (e.g., "br-xxxxx") + """ + browser_id = None + + # Check for existing browser with same name + try: + next_token = None + while True: + list_params = {"maxResults": 100, "type": "CUSTOM"} + if next_token: + list_params["nextToken"] = next_token + + response = control_client.list_browsers(**list_params) + browser_summaries = response.get("browserSummaries", []) + + for browser in browser_summaries: + if browser.get("name") == browser_name: + status = browser.get("status", "") + browser_id = browser.get("browserId") + + if status in ["DELETING", "DELETE_FAILED"]: + browser_id = None + continue + + termcolor.cprint(f"Found existing browser {browser_id}", color="cyan") + break + + if browser_id or "nextToken" not in response: + break + + next_token = response["nextToken"] + except Exception as e: + termcolor.cprint(f"Error checking existing browsers: {e}", color="yellow") + + if not browser_id: + try: + create_params = { + "name": browser_name, + "networkConfiguration": {"networkMode": "PUBLIC"}, + "executionRoleArn": execution_role_arn, + "browserSigning": {"enabled": True}, + } + + if recording_bucket: + create_params["recording"] = { + "enabled": True, + "s3Location": { + "bucket": recording_bucket, + "prefix": recording_prefix.rstrip("/"), + }, + } + + response = control_client.create_browser(**create_params) + browser_id = response["browserId"] + termcolor.cprint(f"Created browser {browser_id}", color="green") + except control_client.exceptions.ConflictException: + raise ValueError( + f"Browser '{browser_name}' already exists in region '{region}'.\n" + f"This browser was likely created in a previous run but couldn't be found in list_browsers.\n\n" + f"To resolve:\n" + f" 1. Delete the existing browser via AWS Console or CLI:\n" + f" aws bedrock-agentcore-control delete-browser --browser-id --region {region}\n" + f" 2. Or use a different browser name by changing your recording configuration" + ) + + return browser_id + + +def setup_browser_recording( + browser_name: str, + browser_identifier: str, + recording_bucket: str, + recording_prefix: str, + execution_role_arn: str | None, + create_execution_role: bool, + region: str, +) -> tuple[str, str]: + """Set up browser recording configuration. + + Ensures execution role exists and browser is created with recording enabled. + + Args: + browser_name: Name for the browser instance (must match [a-zA-Z][a-zA-Z0-9_]{0,47}) + browser_identifier: Browser identifier for sessions (e.g., "aws.browser.v1") + recording_bucket: S3 bucket name for recordings + recording_prefix: S3 prefix for recordings + execution_role_arn: IAM role ARN (or None to create) + create_execution_role: Whether to auto-create role if not provided + region: AWS region + + Returns: + Tuple of (execution_role_arn, browser_id) - The browser_id should be used as the identifier when starting sessions + + Raises: + ValueError: If execution_role_arn is None and create_execution_role is False + """ + import boto3 + + if not execution_role_arn and not create_execution_role: + raise ValueError( + "execution_role_arn is required when using recording. " + "Pass --execution_role_arn or use --create_execution_role to auto-create." + ) + + # Auto-create role if requested + if create_execution_role and not execution_role_arn: + execution_role_arn = create_recording_role( + browser_identifier, recording_bucket, recording_prefix, region + ) + + # If browser_identifier is already a browser ID, reuse it + if browser_identifier.startswith("br-"): + termcolor.cprint(f"Using browser ID: {browser_identifier}", color="cyan") + return execution_role_arn, browser_identifier + + control_client = boto3.client("bedrock-agentcore-control", region_name=region) + browser_id = get_or_create_browser( + control_client, + browser_name, + execution_role_arn, + region, + recording_bucket, + recording_prefix, + ) + + return execution_role_arn, browser_id diff --git a/main.py b/main.py index 05d5537..8d7485a 100644 --- a/main.py +++ b/main.py @@ -15,7 +15,7 @@ import os from agent import BrowserAgent -from computers import BrowserbaseComputer, PlaywrightComputer +from computers import BrowserbaseComputer, PlaywrightComputer, AgentCoreComputer PLAYWRIGHT_SCREEN_SIZE = (1440, 900) @@ -33,7 +33,7 @@ def main() -> int: parser.add_argument( "--env", type=str, - choices=("playwright", "browserbase"), + choices=("playwright", "browserbase", "agentcore"), default="playwright", help="The computer use environment to use.", ) @@ -49,6 +49,36 @@ def main() -> int: default=False, help="If possible, highlight the location of the mouse.", ) + parser.add_argument( + "--recording_bucket", + type=str, + default=None, + help="S3 bucket for AgentCore session recording (agentcore only).", + ) + parser.add_argument( + "--recording_prefix", + type=str, + default="recordings", + help="S3 prefix for AgentCore session recording (agentcore only).", + ) + parser.add_argument( + "--execution_role_arn", + type=str, + default=None, + help="IAM execution role ARN for AgentCore browser (required when using recording).", + ) + parser.add_argument( + "--create_execution_role", + action="store_true", + default=False, + help="Auto-create IAM execution role if it doesn't exist (agentcore only).", + ) + parser.add_argument( + "--browser_identifier", + type=str, + default=None, + help="Browser identifier for AgentCore (agentcore only). Defaults to AGENTCORE_BROWSER_IDENTIFIER env var or 'aws.browser.v1'.", + ) parser.add_argument( "--model", default='gemini-2.5-computer-use-preview-10-2025', @@ -67,6 +97,16 @@ def main() -> int: screen_size=PLAYWRIGHT_SCREEN_SIZE, initial_url=args.initial_url ) + elif args.env == "agentcore": + env = AgentCoreComputer( + screen_size=PLAYWRIGHT_SCREEN_SIZE, + initial_url=args.initial_url, + recording_bucket=args.recording_bucket, + recording_prefix=args.recording_prefix, + execution_role_arn=args.execution_role_arn, + create_execution_role=args.create_execution_role, + browser_identifier=args.browser_identifier, + ) else: raise ValueError("Unknown environment: ", args.env) diff --git a/requirements.txt b/requirements.txt index 6b8fc60..d8683e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,7 @@ pydantic==2.12.0 google-genai>=1.40.0 playwright==1.55.0 browserbase==1.4.0 +boto3>=1.40.64 +bedrock-agentcore>=1.0.5 rich pytest diff --git a/test_main.py b/test_main.py index 4bee9ff..543a2b8 100644 --- a/test_main.py +++ b/test_main.py @@ -65,5 +65,35 @@ def test_main_browserbase(self, mock_browser_agent, mock_browserbase_computer, m mock_browser_agent.assert_called_once() mock_browser_agent.return_value.agent_loop.assert_called_once() + @patch('main.argparse.ArgumentParser') + @patch('main.AgentCoreComputer') + @patch('main.BrowserAgent') + def test_main_agentcore(self, mock_browser_agent, mock_agentcore_computer, mock_arg_parser): + mock_args = MagicMock() + mock_args.env = 'agentcore' + mock_args.query = 'test_query' + mock_args.model = 'test_model' + mock_args.initial_url = 'test_url' + mock_args.recording_bucket = 'test-bucket' + mock_args.recording_prefix = 'test-prefix' + mock_args.execution_role_arn = 'test-role-arn' + mock_args.create_execution_role = False + mock_args.browser_identifier = 'test-browser' + mock_arg_parser.return_value.parse_args.return_value = mock_args + + main.main() + + mock_agentcore_computer.assert_called_once_with( + screen_size=main.PLAYWRIGHT_SCREEN_SIZE, + initial_url='test_url', + recording_bucket='test-bucket', + recording_prefix='test-prefix', + execution_role_arn='test-role-arn', + create_execution_role=False, + browser_identifier='test-browser', + ) + mock_browser_agent.assert_called_once() + mock_browser_agent.return_value.agent_loop.assert_called_once() + if __name__ == '__main__': unittest.main()