diff --git a/Cargo.lock b/Cargo.lock index 7e0f8ed..8064e70 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -50,7 +50,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rand 0.10.1", - "sha1 0.11.0", + "sha1", "smallvec", "tokio", "tokio-util", @@ -571,37 +571,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" -[[package]] -name = "aws-config" -version = "1.8.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "517aa062d8bd9015ee23d6daa5e1c1372328412fdae4e6c4c1be9b69c6ad37a2" -dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-sdk-sso", - "aws-sdk-ssooidc", - "aws-sdk-sts", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-schema", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "hex", - "http 1.4.0", - "sha1 0.10.6", - "time", - "tokio", - "tracing", - "url", - "zeroize", -] - [[package]] name = "aws-credential-types" version = "1.2.14" @@ -614,28 +583,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "aws-lc-rs" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ec2f1fc3ec205783a5da9a7e6c1509cc69dedf09a1949e412c1e18469326d00" -dependencies = [ - "aws-lc-sys", - "zeroize", -] - -[[package]] -name = "aws-lc-sys" -version = "0.41.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a2f9779ce85b93ab6170dd940ad0169b5766ff848247aff13bb788b832fe3f4" -dependencies = [ - "cc", - "cmake", - "dunce", - "fs_extra", -] - [[package]] name = "aws-runtime" version = "1.7.4" @@ -662,32 +609,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "aws-sdk-bedrockagentcore" -version = "1.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b49b793a055a6a745d80328da58e83d14675a5d0b9af32ed491b8b6cc9165f2" -dependencies = [ - "arc-swap", - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-eventstream", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-observability", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http 0.2.12", - "http 1.4.0", - "regex-lite", - "tracing", -] - [[package]] name = "aws-sdk-bedrockruntime" version = "1.131.0" @@ -715,80 +636,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "aws-sdk-sso" -version = "1.100.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bee2719d4a5e5e147bb9e9b77490df6ece750df1094968aa857b09b618a1881a" -dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-observability", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http 0.2.12", - "http 1.4.0", - "regex-lite", - "tracing", -] - -[[package]] -name = "aws-sdk-ssooidc" -version = "1.102.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b30d254992d56ef19f430396e5765b11e0f5bd21a7a557cb12fca1c8c18b9636" -dependencies = [ - "arc-swap", - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-observability", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http 0.2.12", - "http 1.4.0", - "regex-lite", - "tracing", -] - -[[package]] -name = "aws-sdk-sts" -version = "1.105.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59f4f8065fe615dbed9096458ba98dda6d641553ffd5aedd27e37e65211aca9f" -dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-observability", - "aws-smithy-query", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-smithy-xml", - "aws-types", - "fastrand", - "http 0.2.12", - "http 1.4.0", - "regex-lite", - "tracing", -] - [[package]] name = "aws-sigv4" version = "1.4.4" @@ -856,36 +703,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "aws-smithy-http-client" -version = "1.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a2f165a7feee6f263028b899d0a181987f4fa7179a6411a32a439fba7c5f769" -dependencies = [ - "aws-smithy-async", - "aws-smithy-runtime-api", - "aws-smithy-types", - "h2 0.3.27", - "h2 0.4.13", - "http 0.2.12", - "http 1.4.0", - "http-body 0.4.6", - "hyper 0.14.32", - "hyper 1.8.1", - "hyper-rustls 0.24.2", - "hyper-rustls 0.27.7", - "hyper-util", - "pin-project-lite", - "rustls 0.21.12", - "rustls 0.23.37", - "rustls-native-certs", - "rustls-pki-types", - "tokio", - "tokio-rustls 0.26.4", - "tower", - "tracing", -] - [[package]] name = "aws-smithy-json" version = "0.62.6" @@ -906,16 +723,6 @@ dependencies = [ "aws-smithy-runtime-api", ] -[[package]] -name = "aws-smithy-query" -version = "0.60.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a56d79744fb3edb5d722ef79d86081e121d3b9422cb209eb03aea6aa4f21ebd" -dependencies = [ - "aws-smithy-types", - "urlencoding", -] - [[package]] name = "aws-smithy-runtime" version = "1.11.3" @@ -924,7 +731,6 @@ checksum = "b8e6f5caf6fea86f8c2206541ab5857cfcda9013426cdbe8fa0098b9e2d32182" dependencies = [ "aws-smithy-async", "aws-smithy-http", - "aws-smithy-http-client", "aws-smithy-observability", "aws-smithy-runtime-api", "aws-smithy-schema", @@ -991,7 +797,6 @@ dependencies = [ "base64-simd", "bytes", "bytes-utils", - "futures-core", "http 0.2.12", "http 1.4.0", "http-body 0.4.6", @@ -1004,17 +809,6 @@ dependencies = [ "ryu", "serde", "time", - "tokio", - "tokio-util", -] - -[[package]] -name = "aws-smithy-xml" -version = "0.60.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce02add1aa3677d022f8adf81dcbe3046a95f17a1b1e8979c145cd21d3d22b3" -dependencies = [ - "xmlparser", ] [[package]] @@ -1651,15 +1445,6 @@ dependencies = [ "error-code", ] -[[package]] -name = "cmake" -version = "0.1.58" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" -dependencies = [ - "cc", -] - [[package]] name = "cmov" version = "0.5.3" @@ -1756,6 +1541,18 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cost" +version = "0.1.0" +dependencies = [ + "anyhow", + "reqwest", + "serde", + "serde_json 1.0.149", + "tempfile", + "tokio", +] + [[package]] name = "cpubits" version = "0.1.0" @@ -2155,12 +1952,6 @@ dependencies = [ "syn", ] -[[package]] -name = "dunce" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" - [[package]] name = "dyn-clone" version = "1.0.20" @@ -2347,6 +2138,7 @@ dependencies = [ "braintrust-llm-router", "braintrust-sdk-rust", "bytes", + "cost", "exoharness", "futures", "lingua", @@ -2367,6 +2159,7 @@ dependencies = [ "anyhow", "bytes", "clap", + "cost", "executor", "exoharness", "futures", @@ -2402,9 +2195,6 @@ dependencies = [ "aes-gcm 0.11.0-rc.3", "anyhow", "async-trait", - "aws-config", - "aws-credential-types", - "aws-sdk-bedrockagentcore", "base64", "bytes", "chrono", @@ -2541,12 +2331,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fs_extra" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" - [[package]] name = "futures" version = "0.3.31" @@ -2972,30 +2756,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "hyper" -version = "0.14.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2 0.3.27", - "http 0.2.12", - "http-body 0.4.6", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2 0.5.10", - "tokio", - "tower-service", - "tracing", - "want", -] - [[package]] name = "hyper" version = "1.8.1" @@ -3019,21 +2779,6 @@ dependencies = [ "want", ] -[[package]] -name = "hyper-rustls" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" -dependencies = [ - "futures-util", - "http 0.2.12", - "hyper 0.14.32", - "log", - "rustls 0.21.12", - "tokio", - "tokio-rustls 0.24.1", -] - [[package]] name = "hyper-rustls" version = "0.27.7" @@ -3041,13 +2786,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ "http 1.4.0", - "hyper 1.8.1", + "hyper", "hyper-util", - "rustls 0.23.37", + "rustls", "rustls-native-certs", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.4", + "tokio-rustls", "tower-service", "webpki-roots", ] @@ -3064,7 +2809,7 @@ dependencies = [ "futures-util", "http 1.4.0", "http-body 1.0.1", - "hyper 1.8.1", + "hyper", "ipnet", "libc", "percent-encoding", @@ -4112,7 +3857,7 @@ dependencies = [ "http-body-util", "httparse", "humantime", - "hyper 1.8.1", + "hyper", "itertools 0.14.0", "md-5", "parking_lot 0.12.5", @@ -4616,7 +4361,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash 2.1.2", - "rustls 0.23.37", + "rustls", "socket2 0.6.3", "thiserror 2.0.18", "tokio", @@ -4636,7 +4381,7 @@ dependencies = [ "rand 0.9.4", "ring", "rustc-hash 2.1.2", - "rustls 0.23.37", + "rustls", "rustls-pki-types", "slab", "thiserror 2.0.18", @@ -4872,8 +4617,8 @@ dependencies = [ "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.8.1", - "hyper-rustls 0.27.7", + "hyper", + "hyper-rustls", "hyper-util", "js-sys", "log", @@ -4881,7 +4626,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.37", + "rustls", "rustls-native-certs", "rustls-pki-types", "serde", @@ -4889,7 +4634,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls 0.26.4", + "tokio-rustls", "tokio-util", "tower", "tower-http", @@ -4928,7 +4673,7 @@ dependencies = [ "futures", "getrandom 0.2.17", "http 1.4.0", - "hyper 1.8.1", + "hyper", "parking_lot 0.11.2", "reqwest", "reqwest-middleware", @@ -5093,29 +4838,16 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "rustls" -version = "0.21.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" -dependencies = [ - "log", - "ring", - "rustls-webpki 0.101.7", - "sct", -] - [[package]] name = "rustls" version = "0.23.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ - "aws-lc-rs", "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.10", + "rustls-webpki", "subtle", "zeroize", ] @@ -5151,23 +4883,12 @@ dependencies = [ "zeroize", ] -[[package]] -name = "rustls-webpki" -version = "0.101.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "rustls-webpki" version = "0.103.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" dependencies = [ - "aws-lc-rs", "ring", "rustls-pki-types", "untrusted", @@ -5261,16 +4982,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "sct" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "sec1" version = "0.7.3" @@ -5466,17 +5177,6 @@ dependencies = [ "unsafe-libyaml", ] -[[package]] -name = "sha1" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" -dependencies = [ - "cfg-if", - "cpufeatures 0.2.17", - "digest 0.10.7", -] - [[package]] name = "sha1" version = "0.11.0" @@ -5916,23 +5616,13 @@ dependencies = [ "syn", ] -[[package]] -name = "tokio-rustls" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" -dependencies = [ - "rustls 0.21.12", - "tokio", -] - [[package]] name = "tokio-rustls" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls 0.23.37", + "rustls", "tokio", ] @@ -6984,7 +6674,7 @@ dependencies = [ "futures", "http 1.4.0", "http-body-util", - "hyper 1.8.1", + "hyper", "hyper-util", "log", "once_cell", @@ -7101,12 +6791,6 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" -[[package]] -name = "xmlparser" -version = "0.13.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" - [[package]] name = "yoke" version = "0.7.5" diff --git a/Cargo.toml b/Cargo.toml index 65970d0..14a4447 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [workspace] members = [ "crates/cli", + "crates/cost", "crates/exoharness", "crates/executor", "examples/exoclaw/scheduler-runner", @@ -17,9 +18,6 @@ rust-version = "1.95" anyhow = "1.0.100" async-trait = "0.1.89" actix-web = "4.9.0" -aws-config = "1.8.17" -aws-credential-types = "1.2.14" -aws-sdk-bedrockagentcore = "1.45.0" braintrust-sdk-rust = { git = "https://github.com/braintrustdata/braintrust-sdk-rust", rev = "55dcefd989b6038edb5ef8dfc438384340d5f4ae" } bytes = "1.10.1" chrono = { version = "0.4.42", features = ["serde"] } diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index bb93753..639715f 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -5,10 +5,6 @@ edition.workspace = true license.workspace = true rust-version.workspace = true -[features] -default = [] -aws-agentcore = ["executor/aws-agentcore"] - [[bin]] name = "exo" path = "src/main.rs" @@ -16,6 +12,7 @@ path = "src/main.rs" [dependencies] anyhow.workspace = true clap.workspace = true +cost = { path = "../cost" } executor = { path = "../executor" } lingua.workspace = true rustyline = "15.0.0" diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 1dd8424..51a6bef 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -32,9 +32,9 @@ use executor::{ LocalSandboxExoHarness, PutSecretRequest, RlmHarness, SANDBOX_MAIN_MOUNT_DIR, SandboxBackendChoice, SandboxProvider, SandboxProviderConfig, SandboxScope, Secret, SecretBackendChoice, ToolRequest, ToolRuntime, TypeScriptHarness, TypeScriptHarnessConfig, - Uuid7, VercelBackendSpec, default_aws_agentcore_image, default_daytona_image, - default_docker_image, default_vercel_image, effective_sandbox_scope, load_agent_config, - send_conversation_wakeup, serve_exoharness_http_listener_with_options, + Uuid7, VercelBackendSpec, default_daytona_image, default_docker_image, default_vercel_image, + effective_sandbox_scope, load_agent_config, send_conversation_wakeup, + serve_exoharness_http_listener_with_options, }; use lingua::Message; use lingua::universal::{AssistantContent, AssistantContentPart, ToolContentPart, UserContent}; @@ -88,6 +88,12 @@ struct Cli { value_parser = parse_env_var_name )] bearer_env: Option, + /// Path to a LiteLLM price JSON for cost tracking (overrides fetch/cache). + #[arg(long, global = true, env = "EXO_LITELLM_PRICES_PATH")] + pricing_path: Option, + /// URL to fetch the LiteLLM price JSON from (overrides the default source). + #[arg(long, global = true, env = "EXO_LITELLM_PRICES_URL")] + pricing_url: Option, #[command(subcommand)] command: Commands, } @@ -206,8 +212,6 @@ enum SecretBackendArg { enum SandboxProviderArg { Daytona, Vercel, - #[value(name = "aws-agentcore")] - AwsAgentCore, #[value(name = "apple-container")] AppleContainer, Docker, @@ -220,7 +224,6 @@ impl From for SandboxProvider { match value { SandboxProviderArg::Daytona => Self::Daytona, SandboxProviderArg::Vercel => Self::Vercel, - SandboxProviderArg::AwsAgentCore => Self::AwsAgentCore, SandboxProviderArg::AppleContainer => Self::AppleContainer, SandboxProviderArg::Docker => Self::Docker, SandboxProviderArg::LocalProcess => Self::LocalProcess, @@ -251,22 +254,9 @@ fn default_sandbox_backends() -> Vec { SandboxBackendChoice::LocalProcess, SandboxBackendChoice::Daytona(DaytonaBackendSpec::with_conventional_secrets()), SandboxBackendChoice::Vercel(VercelBackendSpec::with_conventional_secrets()), - SandboxBackendChoice::AwsAgentCore, ] } -fn agentcore_region_from_arn(runtime_arn: &str) -> Option { - let mut parts = runtime_arn.split(':'); - let arn = parts.next()?; - let _partition = parts.next()?; - let service = parts.next()?; - let region = parts.next()?; - if arn == "arn" && service == "bedrock-agentcore" && !region.is_empty() { - return Some(region.to_string()); - } - None -} - #[cfg(target_os = "macos")] fn default_secret_backend() -> SecretBackendArg { SecretBackendArg::AppleKeychain @@ -579,35 +569,28 @@ enum ProviderCommands { /// List configured sandbox provider bindings. List, /// Configure a sandbox provider (writes a Binding::Sandbox). - Configure(Box), -} - -#[derive(Debug, Args)] -struct ProviderConfigureArgs { - #[arg(long, value_enum)] - provider: SandboxProviderArg, - /// Binding name (default: the provider name). - #[arg(long)] - name: Option, - /// Secret (by name) holding the provider's API key/token. Required for Daytona and Vercel. - #[arg(long)] - secret: Option, - /// Region/target. Daytona: us | eu | experimental. - #[arg(long)] - region: Option, - #[arg(long)] - organization_id: Option, - #[arg(long)] - project_id: Option, - #[arg(long)] - api_url: Option, - #[arg(long = "runtime-arn")] - runtime_arn: Option, - #[arg(long)] - qualifier: Option, - /// Default base image for sandboxes that don't request one. - #[arg(long)] - default_image: Option, + Configure { + #[arg(long, value_enum)] + provider: SandboxProviderArg, + /// Binding name (default: the provider name). + #[arg(long)] + name: Option, + /// Secret (by name) holding the provider's API key/token. Required for remote providers. + #[arg(long)] + secret: Option, + /// Region/target. Daytona: us | eu | experimental. + #[arg(long)] + region: Option, + #[arg(long)] + organization_id: Option, + #[arg(long)] + project_id: Option, + #[arg(long)] + api_url: Option, + /// Default base image for sandboxes that don't request one. + #[arg(long)] + default_image: Option, + }, } #[derive(Debug, Clone, Default, Args)] @@ -748,6 +731,7 @@ async fn main() -> Result<()> { &cli.command, ) .await?; + let pricing = Arc::new(cost::load(cli.pricing_path.clone(), cli.pricing_url.clone()).await); let harness = instantiate_harness( &cli.root, &exo_config, @@ -755,6 +739,7 @@ async fn main() -> Result<()> { harness_kind, runtime_config.clone(), env_vars.clone(), + pricing, ) .await?; @@ -1746,21 +1731,17 @@ async fn main() -> Result<()> { } } } - ProviderCommands::Configure(args) => { - let ProviderConfigureArgs { - provider, - name, - secret, - region, - organization_id, - project_id, - api_url, - runtime_arn, - qualifier, - default_image, - } = *args; - let binding_name = - name.unwrap_or_else(|| SandboxProvider::from(provider).as_str().to_string()); + ProviderCommands::Configure { + provider, + name, + secret, + region, + organization_id, + project_id, + api_url, + default_image, + } => { + let binding_name = name.unwrap_or_else(|| format!("{provider:?}").to_lowercase()); let config = match provider { SandboxProviderArg::Daytona => { let secret = @@ -1796,27 +1777,6 @@ async fn main() -> Result<()> { default_image: default_image.unwrap_or_else(default_vercel_image), } } - SandboxProviderArg::AwsAgentCore => { - let runtime_arn = runtime_arn.ok_or_else(|| { - anyhow!("--runtime-arn is required for aws-agentcore") - })?; - let region = match region { - Some(region) => region, - None => agentcore_region_from_arn(&runtime_arn).ok_or_else(|| { - anyhow!( - "--region is required when the AgentCore runtime ARN does not include a region" - ) - })?, - }; - SandboxProviderConfig::AwsAgentCore { - runtime_arn, - region, - qualifier, - endpoint_url: api_url, - default_image: default_image - .unwrap_or_else(default_aws_agentcore_image), - } - } SandboxProviderArg::Docker => SandboxProviderConfig::Docker { default_image: default_image.unwrap_or_else(default_docker_image), }, @@ -2010,12 +1970,14 @@ async fn instantiate_harness( kind: HarnessKind, runtime_config: Option, env_vars: HashMap, + pricing: Arc, ) -> Result> { let harness: Arc = match kind { HarnessKind::Basic => Arc::new(BasicHarness::from_exoharness( exoharness, runtime_config, env_vars, + pricing, )), HarnessKind::Rlm => Arc::new(RlmHarness::from_exoharness( exoharness, diff --git a/crates/cli/src/tui.rs b/crates/cli/src/tui.rs index b8eef1e..78bcb18 100644 --- a/crates/cli/src/tui.rs +++ b/crates/cli/src/tui.rs @@ -928,6 +928,7 @@ mod tests { ), }], response_id: None, + usage: None, }); assert_eq!(rendered.len(), 1); diff --git a/crates/cost/Cargo.toml b/crates/cost/Cargo.toml new file mode 100644 index 0000000..7d978a2 --- /dev/null +++ b/crates/cost/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "cost" +edition.workspace = true +version.workspace = true +license.workspace = true +rust-version.workspace = true + +[dependencies] +anyhow.workspace = true +reqwest.workspace = true +serde.workspace = true +serde_json.workspace = true + +[dev-dependencies] +tempfile = "3.23.0" +tokio = { workspace = true } diff --git a/crates/cost/src/lib.rs b/crates/cost/src/lib.rs new file mode 100644 index 0000000..cfb9f71 --- /dev/null +++ b/crates/cost/src/lib.rs @@ -0,0 +1,5 @@ +mod loader; +mod table; + +pub use loader::load; +pub use table::{ModelEntry, PricingTable, TokenCounts}; diff --git a/crates/cost/src/loader.rs b/crates/cost/src/loader.rs new file mode 100644 index 0000000..8dcf0fa --- /dev/null +++ b/crates/cost/src/loader.rs @@ -0,0 +1,125 @@ +//! Loads the LiteLLM price database. Resolution: explicit path -> fresh cache +//! -> fetch (cached on success) -> stale cache -> empty. Never fails: any error +//! degrades to an empty table (cost stays unset, tokens still persist). + +use std::path::PathBuf; +use std::time::Duration; + +use crate::PricingTable; + +const DEFAULT_URL: &str = + "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"; +const CACHE_TTL: Duration = Duration::from_secs(24 * 60 * 60); +const FETCH_TIMEOUT: Duration = Duration::from_secs(5); + +/// Load the table once at startup. `path`/`url` are caller-supplied (CLI flags or +/// env), so this stays free of global config reads. +pub async fn load(path: Option, url: Option) -> PricingTable { + if let Some(path) = path { + return match std::fs::read_to_string(&path) { + Ok(body) => parse_or_empty(&body), + Err(err) => { + eprintln!( + "[exo] reading pricing path {}: {err}; cost unavailable", + path.display() + ); + PricingTable::empty() + } + }; + } + + let cache = cache_path(); + let cached = cache.as_ref().and_then(read_cache); + if let Some((body, true)) = &cached { + return parse_or_empty(body); + } + + let url = url.unwrap_or_else(|| DEFAULT_URL.to_string()); + match fetch(&url).await { + Ok(body) if PricingTable::from_json_str(&body).is_ok() => { + if let Some(path) = &cache { + write_cache(path, &body); + } + parse_or_empty(&body) + } + Ok(_) => { + eprintln!("[exo] fetched pricing was unparseable; cost unavailable"); + PricingTable::empty() + } + Err(err) => match cached { + Some((body, _)) => { + eprintln!("[exo] pricing fetch failed ({err}); using stale cache"); + parse_or_empty(&body) + } + None => { + eprintln!("[exo] pricing fetch failed ({err}); cost unavailable"); + PricingTable::empty() + } + }, + } +} + +/// A corrupt or truncated document degrades to an empty table rather than erroring. +fn parse_or_empty(body: &str) -> PricingTable { + PricingTable::from_json_str(body).unwrap_or_else(|_| { + eprintln!("[exo] cached pricing is unparseable; cost unavailable until it refreshes"); + PricingTable::empty() + }) +} + +async fn fetch(url: &str) -> anyhow::Result { + let client = reqwest::Client::builder().timeout(FETCH_TIMEOUT).build()?; + Ok(client + .get(url) + .send() + .await? + .error_for_status()? + .text() + .await?) +} + +/// Returns `(body, is_fresh)`; `is_fresh` is true only within `CACHE_TTL`. +fn read_cache(path: &PathBuf) -> Option<(String, bool)> { + let fresh = std::fs::metadata(path) + .and_then(|m| m.modified()) + .ok() + .and_then(|t| t.elapsed().ok()) + .is_some_and(|age| age < CACHE_TTL); + Some((std::fs::read_to_string(path).ok()?, fresh)) +} + +fn write_cache(path: &PathBuf, body: &str) { + if let Some(parent) = path.parent() { + let _ = std::fs::create_dir_all(parent); + } + let _ = std::fs::write(path, body); +} + +fn cache_path() -> Option { + let base = std::env::var_os("XDG_CACHE_HOME") + .map(PathBuf::from) + .or_else(|| std::env::var_os("HOME").map(|h| PathBuf::from(h).join(".cache")))?; + Some(base.join("exo").join("litellm_prices.json")) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + const FIXTURE: &str = r#"{ "claude-sonnet-4-6": { "litellm_provider": "anthropic", "input_cost_per_token": 3e-06 } }"#; + + #[tokio::test] + async fn explicit_path_is_loaded() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("prices.json"); + std::fs::write(&path, FIXTURE).unwrap(); + let table = load(Some(path), None).await; + assert!(table.lookup("claude-sonnet-4-6").is_some()); + } + + #[test] + fn corrupt_body_degrades_to_empty() { + assert!(parse_or_empty("{ not json").is_empty()); + } +} diff --git a/crates/cost/src/table.rs b/crates/cost/src/table.rs new file mode 100644 index 0000000..6bda82c --- /dev/null +++ b/crates/cost/src/table.rs @@ -0,0 +1,243 @@ +//! Per-model price data and cost math, parsed from LiteLLM's pricing database. +//! Pure: no network, no globals. + +use std::collections::HashMap; + +use serde::Deserialize; + +#[derive(Debug, Clone, Deserialize, Default)] +pub struct ModelEntry { + #[serde(default)] + pub litellm_provider: Option, + #[serde(default)] + pub input_cost_per_token: Option, + #[serde(default)] + pub output_cost_per_token: Option, + #[serde(default)] + pub cache_read_input_token_cost: Option, + #[serde(default)] + pub cache_creation_input_token_cost: Option, +} + +#[derive(Debug, Clone, Copy, Default)] +pub struct TokenCounts { + pub prompt: Option, + pub completion: Option, + pub prompt_cached: Option, + pub prompt_cache_creation: Option, +} + +#[derive(Debug, Clone, Default)] +pub struct PricingTable { + entries: HashMap, +} + +impl PricingTable { + pub fn empty() -> Self { + Self::default() + } + + /// Parse a LiteLLM `model_prices_and_context_window.json` document. The + /// `sample_spec` doc entry and any entry without per-token rates are skipped. + pub fn from_json_str(s: &str) -> anyhow::Result { + let raw: HashMap = serde_json::from_str(s)?; + let entries = raw + .into_iter() + .filter(|(key, _)| key != "sample_spec") + .filter_map(|(key, value)| { + Some((key, serde_json::from_value::(value).ok()?)) + }) + .collect(); + Ok(Self { entries }) + } + + pub fn len(&self) -> usize { + self.entries.len() + } + + pub fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Exact match, else the longest entry key that is a prefix of `model` at a + /// token boundary (next char absent or `-`/`:`), so dated revisions resolve + /// (`claude-sonnet-4-6-20251022` -> `claude-sonnet-4-6`) without sliding + /// `gpt-4o-mini` onto a `gpt-4` entry when `gpt-4o` is missing. + pub fn lookup(&self, model: &str) -> Option<&ModelEntry> { + if let Some(entry) = self.entries.get(model) { + return Some(entry); + } + self.entries + .iter() + .filter(|(key, _)| { + model.starts_with(key.as_str()) + && matches!( + model.as_bytes().get(key.len()), + None | Some(b'-') | Some(b':') + ) + }) + .max_by_key(|(key, _)| key.len()) + .map(|(_, entry)| entry) + } + + /// USD cost for one call, or `None` if the model is unknown or unpriced. + pub fn compute_cost_usd(&self, model: &str, tokens: TokenCounts) -> Option { + let entry = self.lookup(model)?; + let input = entry.input_cost_per_token?; + let output = entry.output_cost_per_token.unwrap_or(0.0); + let cache_read = entry.cache_read_input_token_cost.unwrap_or(input); + let cache_write = entry.cache_creation_input_token_cost.unwrap_or(input); + + let prompt = tokens.prompt.unwrap_or(0).max(0) as f64; + let completion = tokens.completion.unwrap_or(0).max(0) as f64; + let cached = tokens.prompt_cached.unwrap_or(0).max(0) as f64; + let created = tokens.prompt_cache_creation.unwrap_or(0).max(0) as f64; + + // Anthropic-family `prompt_tokens` excludes cached (bill additively); + // everyone else includes it (subtract before billing fresh input). + let fresh = if is_additive(entry.litellm_provider.as_deref()) { + prompt + } else { + (prompt - cached).max(0.0) + }; + Some(fresh * input + cached * cache_read + created * cache_write + completion * output) + } +} + +fn is_additive(provider: Option<&str>) -> bool { + match provider { + Some(p) => { + p.starts_with("anthropic") || p.starts_with("vertex_ai-anthropic") || p == "azure_ai" + } + None => false, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const FIXTURE: &str = r#"{ + "sample_spec": { "comment": "ignored" }, + "claude-sonnet-4-6": { + "litellm_provider": "anthropic", "input_cost_per_token": 3e-06, + "output_cost_per_token": 1.5e-05, "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 + }, + "gpt-4o-mini": { + "litellm_provider": "openai", "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 6e-07, "cache_read_input_token_cost": 7.5e-08 + }, + "gpt-4": { "litellm_provider": "openai", "input_cost_per_token": 3e-05, "output_cost_per_token": 6e-05 }, + "us.anthropic.claude-sonnet-4-6": { + "litellm_provider": "bedrock_converse", "input_cost_per_token": 3.3e-06, + "output_cost_per_token": 1.65e-05, "cache_read_input_token_cost": 3.3e-07, + "cache_creation_input_token_cost": 4.125e-06 + } + }"#; + + fn table() -> PricingTable { + PricingTable::from_json_str(FIXTURE).unwrap() + } + + fn approx(a: f64, b: f64) { + assert!( + (a - b).abs() / b.abs().max(1e-12) < 1e-9, + "expected {b}, got {a}" + ); + } + + fn counts(prompt: i64, completion: i64, cached: i64, created: i64) -> TokenCounts { + TokenCounts { + prompt: Some(prompt), + completion: Some(completion), + prompt_cached: Some(cached), + prompt_cache_creation: Some(created), + } + } + + #[test] + fn empty_table_is_none() { + assert!( + PricingTable::empty() + .compute_cost_usd("claude-sonnet-4-6", counts(100, 50, 0, 0)) + .is_none() + ); + } + + #[test] + fn skips_sample_spec() { + assert!(table().lookup("sample_spec").is_none()); + assert_eq!(table().len(), 4); + } + + #[test] + fn dated_revision_resolves_to_base() { + let table = table(); + let entry = table.lookup("claude-sonnet-4-6-20251022").unwrap(); + assert_eq!(entry.litellm_provider.as_deref(), Some("anthropic")); + } + + #[test] + fn prefix_match_respects_token_boundary() { + // `gpt-4o` is absent; lookup must NOT fall back to `gpt-4`. + assert!(table().lookup("gpt-4o").is_none()); + // `gpt-4-0613` is a boundary extension of `gpt-4` and should match it. + assert!(table().lookup("gpt-4-0613").is_some()); + } + + #[test] + fn anthropic_additive() { + // 500 fresh + 10k cache-read + 200 completion (prompt excludes cached). + approx( + table() + .compute_cost_usd("claude-sonnet-4-6", counts(500, 200, 10_000, 0)) + .unwrap(), + 0.0075, + ); + } + + #[test] + fn anthropic_cache_creation() { + approx( + table() + .compute_cost_usd("claude-sonnet-4-6", counts(0, 100, 0, 5_000)) + .unwrap(), + 0.02025, + ); + } + + #[test] + fn openai_inclusive_subtracts_cached() { + // prompt=2000 includes 500 cached -> 1500 fresh. + approx( + table() + .compute_cost_usd("gpt-4o-mini", counts(2_000, 1_000, 500, 0)) + .unwrap(), + 0.0008625, + ); + } + + #[test] + fn bedrock_is_inclusive_not_additive() { + // prompt=2000 includes 500 cached -> fresh 1500 @ 3.3e-6, cached 500 @ 3.3e-7, 1000 out @ 1.65e-5. + approx( + table() + .compute_cost_usd( + "us.anthropic.claude-sonnet-4-6", + counts(2_000, 1_000, 500, 0), + ) + .unwrap(), + 1_500.0 * 3.3e-6 + 500.0 * 3.3e-7 + 1_000.0 * 1.65e-5, + ); + } + + #[test] + fn unknown_model_is_none() { + assert!( + table() + .compute_cost_usd("acme-llm-9000", counts(100, 50, 0, 0)) + .is_none() + ); + } +} diff --git a/crates/executor/Cargo.toml b/crates/executor/Cargo.toml index 08fc948..0d8dfb6 100644 --- a/crates/executor/Cargo.toml +++ b/crates/executor/Cargo.toml @@ -5,10 +5,6 @@ version.workspace = true license.workspace = true rust-version.workspace = true -[features] -default = [] -aws-agentcore = ["exoharness/aws-agentcore"] - [dependencies] anyhow.workspace = true async-trait.workspace = true @@ -17,6 +13,7 @@ boa_engine = "0.20.0" braintrust-llm-router.workspace = true braintrust-sdk-rust.workspace = true bytes.workspace = true +cost = { path = "../cost" } exoharness = { path = "../exoharness", features = [ "apple-keychain", "basic-backend", diff --git a/crates/executor/src/basic.rs b/crates/executor/src/basic.rs index 29993a9..2412d2c 100644 --- a/crates/executor/src/basic.rs +++ b/crates/executor/src/basic.rs @@ -3,9 +3,10 @@ use std::sync::{Arc, RwLock}; use std::time::Instant; use async_trait::async_trait; +use cost::{PricingTable, TokenCounts}; use exoharness::{ AgentHandle, ConversationHandle, ConversationId, EventData, EventId, EventKind, EventQuery, - EventQueryDirection, Result, ToolCallId, ToolRequest, TurnHandle, + EventQueryDirection, Result, ToolCallId, ToolRequest, TurnHandle, UsageRecord, }; use lingua::Message; use lingua::universal::{ToolContentPart, ToolResultContentPart}; @@ -24,14 +25,21 @@ pub struct BasicExecutor { model: Arc, tools: Arc, history_cache: Arc>>, + pricing: Arc, } impl BasicExecutor { pub fn new(model: Arc, tools: Arc) -> Self { + Self::with_pricing(model, tools, Arc::new(PricingTable::empty())) + } + + /// Cost is filled from `pricing`; an empty table leaves `cost_usd` unset. + pub fn with_pricing(model: Arc, tools: Arc, pricing: Arc) -> Self { Self { model, tools, history_cache: Arc::new(RwLock::new(HashMap::new())), + pricing, } } } @@ -42,6 +50,7 @@ impl Clone for BasicExecutor { model: Arc::clone(&self.model), tools: Arc::clone(&self.tools), history_cache: Arc::clone(&self.history_cache), + pricing: Arc::clone(&self.pricing), } } } @@ -133,7 +142,7 @@ where .complete_model_round(request, round as usize, stream_mode, turn_trace) .await?; - let events = interpret_model_response(response); + let events = interpret_model_response(response, &self.pricing); turn.add_events(events.clone()).await?; let tool_requests = collect_tool_requests(&events); @@ -170,9 +179,11 @@ where Some(turn_trace) => turn_trace.start_llm_round(&request, round).await, None => None, }; + let requested_model = request.model.clone(); match stream_mode { ExecutorStreamMode::Disabled => { + let started_at = Instant::now(); let response = match self.model.complete(request).await { Ok(response) => response, Err(error) => { @@ -182,6 +193,14 @@ where return Err(error); } }; + let duration = started_at.elapsed(); + let mut response = response; + if response.model.is_none() { + response.model = Some(requested_model); + } + if response.duration.is_none() { + response.duration = Some(duration); + } if let Some(llm_trace) = llm_trace { llm_trace.finish_success(&response, None).await; } @@ -236,6 +255,17 @@ where return Err(error); } }; + let duration = started_at.elapsed(); + let mut response = response; + if response.model.is_none() { + response.model = Some(requested_model); + } + if response.ttft.is_none() { + response.ttft = ttft; + } + if response.duration.is_none() { + response.duration = Some(duration); + } if let Some(llm_trace) = llm_trace { llm_trace.finish_success(&response, ttft).await; } @@ -447,13 +477,15 @@ fn remove_pending_tool_call(pending_tool_call_ids: &mut Vec, tool_ca } } -fn interpret_model_response(response: ModelResponse) -> Vec { +fn interpret_model_response(response: ModelResponse, pricing: &PricingTable) -> Vec { let mut events = Vec::new(); if !response.messages.is_empty() { + let usage = build_usage_record(&response, pricing); events.push(EventData::Messages { messages: response.messages, response_id: response.response_id, + usage, }); } @@ -468,6 +500,64 @@ fn interpret_model_response(response: ModelResponse) -> Vec { events } +fn build_usage_record( + response: &ModelResponse, + pricing: &PricingTable, +) -> Option> { + // Only emit a record when we have *something* worth recording — token usage + // or timing. Skipping when both are absent keeps event JSON clean for + // tests/fakes that don't populate metadata. + let has_usage = response.usage.is_some(); + let has_timing = response.ttft.is_some() || response.duration.is_some(); + if !has_usage && !has_timing { + return None; + } + + let model = response.model.clone().unwrap_or_default(); + let ( + prompt_tokens, + completion_tokens, + prompt_cached_tokens, + prompt_cache_creation_tokens, + completion_reasoning_tokens, + ) = match &response.usage { + Some(u) => ( + u.prompt_tokens, + u.completion_tokens, + u.prompt_cached_tokens, + u.prompt_cache_creation_tokens, + u.completion_reasoning_tokens, + ), + None => (None, None, None, None, None), + }; + + let cost_usd = if has_usage && !model.is_empty() { + pricing.compute_cost_usd( + &model, + TokenCounts { + prompt: prompt_tokens, + completion: completion_tokens, + prompt_cached: prompt_cached_tokens, + prompt_cache_creation: prompt_cache_creation_tokens, + }, + ) + } else { + None + }; + + Some(Box::new(UsageRecord { + model, + prompt_tokens, + completion_tokens, + prompt_cached_tokens, + prompt_cache_creation_tokens, + completion_reasoning_tokens, + cost_usd, + ttft_ms: response.ttft.map(|d| d.as_millis() as u64), + duration_ms: response.duration.map(|d| d.as_millis() as u64), + })) +} + #[derive(Debug, Clone)] struct ExecutableToolRequest { tool_call_id: String, diff --git a/crates/executor/src/basic_tests.rs b/crates/executor/src/basic_tests.rs index a7fd519..e88b219 100644 --- a/crates/executor/src/basic_tests.rs +++ b/crates/executor/src/basic_tests.rs @@ -48,6 +48,9 @@ async fn send_appends_user_and_assistant_messages() { messages: vec![assistant_message("pong")], tool_calls: vec![], usage: None, + model: None, + ttft: None, + duration: None, }])), Arc::new(FakeToolRuntime::default()), ); @@ -136,12 +139,18 @@ async fn send_executes_tool_round_trip() { }, }], usage: None, + model: None, + ttft: None, + duration: None, }, ModelResponse { response_id: Some(Uuid7::now()), messages: vec![assistant_message("done")], tool_calls: vec![], usage: None, + model: None, + ttft: None, + duration: None, }, ])); let executor = BasicExecutor::new( @@ -249,12 +258,18 @@ async fn send_records_tool_result_when_tool_execution_fails() { }, }], usage: None, + model: None, + ttft: None, + duration: None, }, ModelResponse { response_id: Some(Uuid7::now()), messages: vec![assistant_message("recovered")], tool_calls: vec![], usage: None, + model: None, + ttft: None, + duration: None, }, ])); let executor = BasicExecutor::new( @@ -357,6 +372,9 @@ async fn send_stream_emits_chunks_and_persists_final_response() { messages: vec![assistant_message("hello")], tool_calls: vec![], usage: None, + model: None, + ttft: None, + duration: None, }, }])), Arc::new(FakeToolRuntime::default()), @@ -686,18 +704,12 @@ impl AgentHandle for FakeAgentHandle { &self.record } - async fn list_conversations( - &self, - _request: exoharness::ListConversationsRequest, - ) -> Result>> { + async fn list_conversations(&self) -> Result>> { let state = self.state.lock().expect("state poisoned"); - Ok(exoharness::ListConversationsResult { - conversations: vec![Arc::new(FakeConversationHandle { - state: Arc::clone(&self.state), - record: state.conversation.record.clone(), - })], - next_cursor: None, - }) + Ok(vec![Arc::new(FakeConversationHandle { + state: Arc::clone(&self.state), + record: state.conversation.record.clone(), + })]) } async fn get_conversation( @@ -806,6 +818,7 @@ impl ConversationHandle for FakeConversationHandle { EventData::Messages { messages: request.input, response_id: None, + usage: None, }, )); } diff --git a/crates/executor/src/executor_types.rs b/crates/executor/src/executor_types.rs index 68292fc..1592a75 100644 --- a/crates/executor/src/executor_types.rs +++ b/crates/executor/src/executor_types.rs @@ -184,6 +184,15 @@ pub struct ModelResponse { pub messages: Vec, pub tool_calls: Vec, pub usage: Option, + /// Model identifier echoed back by the provider, if available. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub model: Option, + /// Time to first token (streaming path only). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub ttft: Option, + /// Wall-clock duration from request start to end of response. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub duration: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/crates/executor/src/harness_basic.rs b/crates/executor/src/harness_basic.rs index 51bccfb..e1f339b 100644 --- a/crates/executor/src/harness_basic.rs +++ b/crates/executor/src/harness_basic.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use std::sync::Arc; use crate::{BasicExecutor, BraintrustRuntimeConfig, ModelClient, ToolRuntime}; +use cost::PricingTable; use exoharness::{BasicExoHarness, BasicExoHarnessConfig, ExoHarness, Result}; use crate::harness_executor::ExecutorHarnessRuntime; @@ -24,6 +25,24 @@ impl BasicHarness { inner: SharedHarness::new(exoharness, runtime), } } + + /// Construct a harness whose executor fills cost from an explicit table. + pub fn with_pricing_table( + exoharness: Arc, + model: Arc, + tools: Arc, + pricing: Arc, + ) -> Self + where + M: ModelClient + 'static, + T: ToolRuntime + 'static, + { + let runtime = + ExecutorHarnessRuntime::new(BasicExecutor::with_pricing(model, tools, pricing), None); + Self { + inner: SharedHarness::new(exoharness, runtime), + } + } } impl BasicHarness { @@ -31,10 +50,14 @@ impl BasicHarness { exoharness: Arc, runtime_config: Option, env: HashMap, + pricing: Arc, ) -> Self { let model = Arc::new(RouterModelClient::new(env)); let tools = Arc::new(BasicToolRuntime); - let runtime = ExecutorHarnessRuntime::new(BasicExecutor::new(model, tools), runtime_config); + let runtime = ExecutorHarnessRuntime::new( + BasicExecutor::with_pricing(model, tools, pricing), + runtime_config, + ); Self { inner: SharedHarness::new(exoharness, runtime), } @@ -49,6 +72,7 @@ impl BasicHarness { Arc::new(BasicExoHarness::new(exo_config).await?), runtime_config, env, + Arc::new(PricingTable::empty()), )) } } diff --git a/crates/executor/src/harness_basic_tests.rs b/crates/executor/src/harness_basic_tests.rs index f1c3de1..9c732c4 100644 --- a/crates/executor/src/harness_basic_tests.rs +++ b/crates/executor/src/harness_basic_tests.rs @@ -12,7 +12,7 @@ use exoharness::{ ToolRequest, Uuid7, }; use lingua::universal::{AssistantContent, UserContent}; -use lingua::{Message, UniversalStreamChunk}; +use lingua::{Message, UniversalStreamChunk, UniversalUsage}; use serde_json::{Map, Value}; use tempfile::TempDir; @@ -113,6 +113,9 @@ async fn send_persists_messages_through_harness() { messages: vec![assistant_message("pong")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }])), Arc::new(BasicToolRuntime), ); @@ -172,6 +175,390 @@ async fn send_persists_messages_through_harness() { ); } +#[tokio::test(flavor = "current_thread")] +async fn usage_record_is_persisted_with_computed_cost() { + // Use an inline LiteLLM-schema fixture so the assertion is hermetic + // and doesn't depend on whatever rates the upstream JSON happens to + // ship today. + const PRICING_FIXTURE: &str = r#"{ + "claude-sonnet-4-6": { + "litellm_provider": "anthropic", + "mode": "chat", + "input_cost_per_token": 3e-06, + "output_cost_per_token": 1.5e-05, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 + } + }"#; + let pricing = + Arc::new(cost::PricingTable::from_json_str(PRICING_FIXTURE).expect("fixture should parse")); + + let tempdir = TempDir::new().expect("tempdir should exist"); + let exoharness = Arc::new( + BasicExoHarness::new(local_test_config(tempdir.path().join("exoharness"))) + .await + .expect("basic exoharness should initialize"), + ) as Arc; + let harness = BasicHarness::with_pricing_table( + Arc::clone(&exoharness), + Arc::new(FakeModelClient::new(vec![ModelResponse { + response_id: Some(Uuid7::now()), + messages: vec![assistant_message("pong")], + tool_calls: Vec::new(), + usage: Some(UniversalUsage { + prompt_tokens: Some(1_000), + completion_tokens: Some(500), + prompt_cached_tokens: None, + prompt_cache_creation_tokens: None, + completion_reasoning_tokens: None, + }), + model: Some("claude-sonnet-4-6".to_string()), + ttft: None, + duration: None, + }])), + Arc::new(BasicToolRuntime), + pricing, + ); + + let secret_id = exoharness + .put_secret(PutSecretRequest { + name: "cost-test-key".to_string(), + secret: Secret::Key { + value: "test-key".to_string(), + }, + }) + .await + .expect("test secret should register"); + exoharness + .put_binding(Binding::Llm { + name: "claude-sonnet-4-6".to_string(), + model: "claude-sonnet-4-6".to_string(), + base_url: None, + secret_id: Some(secret_id), + }) + .await + .expect("binding should register"); + + let agent = harness + .create_agent(CreateAgentRequest { + slug: "cost-demo".to_string(), + name: None, + harness: crate::AgentHarnessKind::Basic, + typescript: None, + enable_agent_tool_creation: true, + sandbox_image: None, + sandbox_provider: SandboxProvider::LocalProcess, + enable_networking: false, + model: "claude-sonnet-4-6".to_string(), + max_output_tokens: None, + max_tool_round_trips: Some(2), + braintrust: None, + }) + .await + .expect("agent should be created"); + let conversation = agent + .create_conversation(CreateConversationRequest::default()) + .await + .expect("conversation should be created"); + + conversation + .send(SendRequest { + input: vec![user_message("ping")], + session_id: None, + }) + .await + .expect("send should succeed"); + + let events = conversation + .exoharness_handle() + .get_events(Some(EventQuery { + cursor: None, + direction: Some(EventQueryDirection::Asc), + limit: None, + session_id: None, + turn_id: None, + types: None, + })) + .await + .expect("get events should succeed") + .events; + + let assistant_usage = events + .iter() + .find_map(|event| match &event.data { + EventData::Messages { + messages, + usage: Some(usage), + .. + } if messages + .iter() + .any(|m| matches!(m, Message::Assistant { .. })) => + { + Some(usage) + } + _ => None, + }) + .expect("assistant message event should carry a UsageRecord"); + + assert_eq!(assistant_usage.model, "claude-sonnet-4-6"); + assert_eq!(assistant_usage.prompt_tokens, Some(1_000)); + assert_eq!(assistant_usage.completion_tokens, Some(500)); + // 1000 prompt @ $3/M + 500 completion @ $15/M = $0.003 + $0.0075 = $0.0105 + let cost = assistant_usage.cost_usd.expect("cost should be computed"); + assert!( + (cost - 0.0105).abs() < 1e-9, + "expected cost ~0.0105, got {cost}" + ); + // Non-streaming path measures total duration. + assert!( + assistant_usage.duration_ms.is_some(), + "duration should be recorded" + ); +} + +#[tokio::test(flavor = "current_thread")] +async fn usage_record_with_anthropic_cache_hits() { + // Anthropic accounting is additive: prompt_tokens is the fresh slice, + // and cache_read / cache_creation are billed separately on top. The + // pricing math is unit-tested in exoharness::pricing; this test is the + // end-to-end proof that cached counts (a) reach the persisted + // UsageRecord and (b) hit the discounted cache-read rate when + // compute_cost_usd is invoked through the executor. + const PRICING_FIXTURE: &str = r#"{ + "claude-sonnet-4-6": { + "litellm_provider": "anthropic", + "mode": "chat", + "input_cost_per_token": 3e-06, + "output_cost_per_token": 1.5e-05, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 + } + }"#; + let pricing = + Arc::new(cost::PricingTable::from_json_str(PRICING_FIXTURE).expect("fixture should parse")); + + let tempdir = TempDir::new().expect("tempdir should exist"); + let exoharness = Arc::new( + BasicExoHarness::new(local_test_config(tempdir.path().join("exoharness"))) + .await + .expect("basic exoharness should initialize"), + ) as Arc; + let harness = BasicHarness::with_pricing_table( + Arc::clone(&exoharness), + Arc::new(FakeModelClient::new(vec![ModelResponse { + response_id: Some(Uuid7::now()), + messages: vec![assistant_message("pong")], + tool_calls: Vec::new(), + usage: Some(UniversalUsage { + prompt_tokens: Some(500), + completion_tokens: Some(200), + prompt_cached_tokens: Some(10_000), + prompt_cache_creation_tokens: Some(2_000), + completion_reasoning_tokens: None, + }), + model: Some("claude-sonnet-4-6".to_string()), + ttft: None, + duration: None, + }])), + Arc::new(BasicToolRuntime), + pricing, + ); + + let secret_id = exoharness + .put_secret(PutSecretRequest { + name: "anthropic-cache-key".to_string(), + secret: Secret::Key { + value: "test-key".to_string(), + }, + }) + .await + .expect("test secret should register"); + exoharness + .put_binding(Binding::Llm { + name: "claude-sonnet-4-6".to_string(), + model: "claude-sonnet-4-6".to_string(), + base_url: None, + secret_id: Some(secret_id), + }) + .await + .expect("binding should register"); + + let agent = harness + .create_agent(CreateAgentRequest { + slug: "anthropic-cache".to_string(), + name: None, + harness: crate::AgentHarnessKind::Basic, + typescript: None, + enable_agent_tool_creation: true, + sandbox_image: None, + sandbox_provider: SandboxProvider::LocalProcess, + enable_networking: false, + model: "claude-sonnet-4-6".to_string(), + max_output_tokens: None, + max_tool_round_trips: Some(2), + braintrust: None, + }) + .await + .expect("agent should be created"); + let conversation = agent + .create_conversation(CreateConversationRequest::default()) + .await + .expect("conversation should be created"); + + conversation + .send(SendRequest { + input: vec![user_message("ping")], + session_id: None, + }) + .await + .expect("send should succeed"); + + let usage = assistant_usage_record(&conversation).await; + + assert_eq!(usage.model, "claude-sonnet-4-6"); + assert_eq!(usage.prompt_tokens, Some(500)); + assert_eq!(usage.completion_tokens, Some(200)); + assert_eq!(usage.prompt_cached_tokens, Some(10_000)); + assert_eq!(usage.prompt_cache_creation_tokens, Some(2_000)); + // Anthropic-style additive: + // 500 fresh prompt @ $3/M = 0.0015 + // 10000 cache read @ $0.30/M = 0.003 + // 2000 cache creation @ $3.75/M = 0.0075 + // 200 completion @ $15/M = 0.003 + // total = 0.015 + let cost = usage.cost_usd.expect("cost should be computed"); + assert!( + (cost - 0.015).abs() < 1e-9, + "expected cost ~0.015, got {cost}" + ); +} + +#[tokio::test(flavor = "current_thread")] +async fn usage_record_with_openai_inclusive_accounting() { + // OpenAI accounting is inclusive: prompt_tokens is the *total* input + // including any cache hits, so the executor must subtract + // prompt_cached_tokens before billing the fresh-input rate. Getting + // this wrong silently double-bills cached tokens. This test pins the + // behavior at the conversation-log level — the same accounting that + // pricing.rs exercises in isolation now also has to survive the round + // trip through ModelResponse → UsageRecord → persisted event. + const PRICING_FIXTURE: &str = r#"{ + "gpt-4o-mini": { + "litellm_provider": "openai", + "mode": "chat", + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 6e-07, + "cache_read_input_token_cost": 7.5e-08 + } + }"#; + let pricing = + Arc::new(cost::PricingTable::from_json_str(PRICING_FIXTURE).expect("fixture should parse")); + + let tempdir = TempDir::new().expect("tempdir should exist"); + let exoharness = Arc::new( + BasicExoHarness::new(local_test_config(tempdir.path().join("exoharness"))) + .await + .expect("basic exoharness should initialize"), + ) as Arc; + let harness = BasicHarness::with_pricing_table( + Arc::clone(&exoharness), + Arc::new(FakeModelClient::new(vec![ModelResponse { + response_id: Some(Uuid7::now()), + messages: vec![assistant_message("pong")], + tool_calls: Vec::new(), + usage: Some(UniversalUsage { + // prompt_tokens here *includes* the 500 cached — OpenAI + // convention. + prompt_tokens: Some(2_000), + completion_tokens: Some(1_000), + prompt_cached_tokens: Some(500), + prompt_cache_creation_tokens: None, + completion_reasoning_tokens: None, + }), + model: Some("gpt-4o-mini".to_string()), + ttft: None, + duration: None, + }])), + Arc::new(BasicToolRuntime), + pricing, + ); + + let secret_id = exoharness + .put_secret(PutSecretRequest { + name: "openai-cache-key".to_string(), + secret: Secret::Key { + value: "test-key".to_string(), + }, + }) + .await + .expect("test secret should register"); + exoharness + .put_binding(Binding::Llm { + name: "gpt-4o-mini".to_string(), + model: "gpt-4o-mini".to_string(), + base_url: None, + secret_id: Some(secret_id), + }) + .await + .expect("binding should register"); + + let agent = harness + .create_agent(CreateAgentRequest { + slug: "openai-cache".to_string(), + name: None, + harness: crate::AgentHarnessKind::Basic, + typescript: None, + enable_agent_tool_creation: true, + sandbox_image: None, + sandbox_provider: SandboxProvider::LocalProcess, + enable_networking: false, + model: "gpt-4o-mini".to_string(), + max_output_tokens: None, + max_tool_round_trips: Some(2), + braintrust: None, + }) + .await + .expect("agent should be created"); + let conversation = agent + .create_conversation(CreateConversationRequest::default()) + .await + .expect("conversation should be created"); + + conversation + .send(SendRequest { + input: vec![user_message("ping")], + session_id: None, + }) + .await + .expect("send should succeed"); + + let usage = assistant_usage_record(&conversation).await; + + assert_eq!(usage.model, "gpt-4o-mini"); + // Raw counts are preserved as the provider reported them — the + // inclusive convention only matters for the cost computation, not for + // the stored tokens. + assert_eq!(usage.prompt_tokens, Some(2_000)); + assert_eq!(usage.completion_tokens, Some(1_000)); + assert_eq!(usage.prompt_cached_tokens, Some(500)); + // OpenAI-style inclusive: + // non_cached = 2000 - 500 = 1500 + // 1500 fresh prompt @ $0.15/M = 0.000225 + // 500 cache read @ $0.075/M = 0.0000375 + // 1000 completion @ $0.60/M = 0.0006 + // total = 0.0008625 + // If the executor mistakenly used the Anthropic-style additive + // formula here, it would bill all 2000 prompt tokens at the fresh + // rate and the total would be 0.0009375 — ~9% high — so this + // assertion catches the provider-classification bug the PR + // description calls out. + let cost = usage.cost_usd.expect("cost should be computed"); + assert!( + (cost - 0.0008625).abs() < 1e-9, + "expected cost ~0.0008625, got {cost}" + ); +} + #[tokio::test(flavor = "current_thread")] async fn close_session_appends_session_ended_event() { let tempdir = TempDir::new().expect("tempdir should exist"); @@ -187,6 +574,9 @@ async fn close_session_appends_session_ended_event() { messages: vec![assistant_message("pong")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }])), Arc::new(BasicToolRuntime), ); @@ -262,12 +652,18 @@ async fn updating_agent_config_refreshes_executor_cache() { messages: vec![assistant_message("pong-1")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }, ModelResponse { response_id: Some(Uuid7::now()), messages: vec![assistant_message("pong-2")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }, ])); let harness = BasicHarness::new(exoharness, Arc::clone(&model), Arc::new(BasicToolRuntime)); @@ -344,12 +740,18 @@ async fn send_executes_shell_tool_when_enabled() { }, }], usage: None, + model: None, + ttft: None, + duration: None, }, ModelResponse { response_id: Some(Uuid7::now()), messages: vec![assistant_message("done")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }, ])); let harness = BasicHarness::new(exoharness, Arc::clone(&model), Arc::new(BasicToolRuntime)); @@ -483,10 +885,9 @@ async fn harness_exposes_raw_exoharness_handles() { assert_eq!( agent .exoharness_handle() - .list_conversations(exoharness::ListConversationsRequest::default()) + .list_conversations() .await .expect("list conversations through agent handle") - .conversations .len(), 1 ); @@ -526,12 +927,18 @@ async fn updating_mounts_recreates_conversation_sandbox() { }, }], usage: None, + model: None, + ttft: None, + duration: None, }, ModelResponse { response_id: Some(Uuid7::now()), messages: vec![assistant_message("done-1")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }, ModelResponse { response_id: Some(Uuid7::now()), @@ -544,12 +951,18 @@ async fn updating_mounts_recreates_conversation_sandbox() { }, }], usage: None, + model: None, + ttft: None, + duration: None, }, ModelResponse { response_id: Some(Uuid7::now()), messages: vec![assistant_message("done-2")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }, ])); let harness = BasicHarness::new(exoharness, model, Arc::new(BasicToolRuntime)); @@ -762,18 +1175,27 @@ async fn conversation_model_override_changes_effective_model() { messages: vec![assistant_message("first")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }, ModelResponse { response_id: Some(Uuid7::now()), messages: vec![assistant_message("second")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }, ModelResponse { response_id: Some(Uuid7::now()), messages: vec![assistant_message("third")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }, ])); let harness = BasicHarness::new(exoharness, Arc::clone(&model), Arc::new(BasicToolRuntime)); @@ -925,6 +1347,45 @@ fn assistant_message(text: &str) -> Message { } } +/// Fetch the UsageRecord attached to the first Messages event that +/// contains an assistant message. Mirrors the pattern in +/// `usage_record_is_persisted_with_computed_cost` so the new tests stay +/// readable. +async fn assistant_usage_record( + conversation: &Arc, +) -> exoharness::UsageRecord { + let events = conversation + .exoharness_handle() + .get_events(Some(EventQuery { + cursor: None, + direction: Some(EventQueryDirection::Asc), + limit: None, + session_id: None, + turn_id: None, + types: None, + })) + .await + .expect("get events should succeed") + .events; + + events + .into_iter() + .find_map(|event| match event.data { + EventData::Messages { + messages, + usage: Some(usage), + .. + } if messages + .iter() + .any(|m| matches!(m, Message::Assistant { .. })) => + { + Some(*usage) + } + _ => None, + }) + .expect("assistant message event should carry a UsageRecord") +} + fn shell_command_arguments(command: &str) -> Map { Map::from_iter([(String::from("command"), Value::String(command.to_string()))]) } diff --git a/crates/executor/src/harness_facade.rs b/crates/executor/src/harness_facade.rs index 71ba9c2..3c18aab 100644 --- a/crates/executor/src/harness_facade.rs +++ b/crates/executor/src/harness_facade.rs @@ -196,11 +196,7 @@ where } async fn list_conversations(&self) -> Result> { - let conversations = self - .agent - .list_conversations(exoharness::ListConversationsRequest::default()) - .await? - .conversations; + let conversations = self.agent.list_conversations().await?; Ok(conversations .into_iter() .map(|conversation| conversation.record().clone()) diff --git a/crates/executor/src/harness_helpers.rs b/crates/executor/src/harness_helpers.rs index d08acb7..835cfec 100644 --- a/crates/executor/src/harness_helpers.rs +++ b/crates/executor/src/harness_helpers.rs @@ -68,10 +68,7 @@ pub(crate) async fn resolve_conversation_handle( return Ok(Some(conversation)); } - let conversations = agent - .list_conversations(exoharness::ListConversationsRequest::default()) - .await? - .conversations; + let conversations = agent.list_conversations().await?; Ok(conversations .into_iter() .find(|conversation| conversation.record().slug == conversation_ref)) diff --git a/crates/executor/src/harness_runtime.rs b/crates/executor/src/harness_runtime.rs index 2932294..0017fad 100644 --- a/crates/executor/src/harness_runtime.rs +++ b/crates/executor/src/harness_runtime.rs @@ -400,6 +400,9 @@ fn normalize_model_response(response: UniversalResponse) -> Result Result>> { - let result = self.remote.list_conversations(request).await?; - Ok(ListConversationsResult { - conversations: result - .conversations - .into_iter() - .map(|remote| wrap_conversation(Arc::clone(&self.state), remote)) - .collect(), - next_cursor: result.next_cursor, - }) + async fn list_conversations(&self) -> Result>> { + Ok(self + .remote + .list_conversations() + .await? + .into_iter() + .map(|remote| wrap_conversation(Arc::clone(&self.state), remote)) + .collect()) } async fn get_conversation( @@ -260,9 +254,8 @@ impl LocalSandboxConversation { let slug = format!("remote-{}", self.remote.record().id); let local_conversation = match local_agent - .list_conversations(ListConversationsRequest::default()) + .list_conversations() .await? - .conversations .into_iter() .find(|conversation| conversation.record().slug == slug) { diff --git a/crates/executor/src/rlm.rs b/crates/executor/src/rlm.rs index c8b19f5..6a0fd97 100644 --- a/crates/executor/src/rlm.rs +++ b/crates/executor/src/rlm.rs @@ -536,6 +536,7 @@ async fn append_final_answer( turn.add_events(vec![EventData::Messages { messages: vec![assistant_message(final_answer)], response_id, + usage: None, }]) .await?; append_custom_event( diff --git a/crates/executor/src/rlm_tests.rs b/crates/executor/src/rlm_tests.rs index bfddf63..952a828 100644 --- a/crates/executor/src/rlm_tests.rs +++ b/crates/executor/src/rlm_tests.rs @@ -46,12 +46,18 @@ async fn rlm_send_executes_repl_steps_and_persists_final_answer() { }, }], usage: None, + model: None, + ttft: None, + duration: None, }, ModelResponse { response_id: Some(Uuid7::now()), messages: vec![assistant_message("FINAL(done)")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }, ])); let harness = RlmHarness::new(exoharness, model); @@ -143,6 +149,9 @@ async fn rlm_subquery_variable_can_store_final_answer() { }, }], usage: None, + model: None, + ttft: None, + duration: None, }, ModelResponse { response_id: Some(Uuid7::now()), @@ -168,18 +177,27 @@ async fn rlm_subquery_variable_can_store_final_answer() { }, }], usage: None, + model: None, + ttft: None, + duration: None, }, ModelResponse { response_id: Some(Uuid7::now()), messages: vec![assistant_message("4")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }, ModelResponse { response_id: Some(Uuid7::now()), messages: vec![assistant_message("FINAL_VAR(final_answer)")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }, ])); let harness = RlmHarness::new(exoharness, Arc::clone(&model)); @@ -246,6 +264,9 @@ async fn rlm_send_stream_suppresses_internal_control_text() { messages: vec![assistant_message("FINAL(2)")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }, }])); let harness = RlmHarness::new(exoharness, model); @@ -315,6 +336,9 @@ async fn rlm_exposes_history_via_get_messages() { messages: vec![assistant_message("FINAL(recorded)")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }, ModelResponse { response_id: Some(Uuid7::now()), @@ -343,12 +367,18 @@ globalThis.answer = String(\n\ }, }], usage: None, + model: None, + ttft: None, + duration: None, }, ModelResponse { response_id: Some(Uuid7::now()), messages: vec![assistant_message("FINAL_VAR(answer)")], tool_calls: Vec::new(), usage: None, + model: None, + ttft: None, + duration: None, }, ])); let harness = RlmHarness::new(exoharness, model); @@ -421,6 +451,9 @@ async fn rlm_can_finish_by_setting_final_in_repl() { }, }], usage: None, + model: None, + ttft: None, + duration: None, }])); let harness = RlmHarness::new(exoharness, Arc::clone(&model)); register_test_model(harness.exoharness_handle().as_ref()).await; diff --git a/crates/exoharness/Cargo.toml b/crates/exoharness/Cargo.toml index 251462f..a935611 100644 --- a/crates/exoharness/Cargo.toml +++ b/crates/exoharness/Cargo.toml @@ -20,12 +20,6 @@ basic-backend = [ "dep:url", ] apple-keychain = ["dep:keyring", "dep:keyring-core"] -aws-agentcore = [ - "basic-backend", - "dep:aws-config", - "dep:aws-credential-types", - "dep:aws-sdk-bedrockagentcore", -] contract-tests = [] [dependencies] @@ -33,9 +27,6 @@ aes-gcm = { version = "0.11.0-rc.3", optional = true } actix-web = { workspace = true, optional = true } anyhow.workspace = true async-trait.workspace = true -aws-config = { workspace = true, optional = true } -aws-credential-types = { workspace = true, optional = true } -aws-sdk-bedrockagentcore = { workspace = true, optional = true } base64 = { version = "0.22.1", optional = true } bytes.workspace = true chrono.workspace = true diff --git a/crates/exoharness/src/basic.rs b/crates/exoharness/src/basic.rs index 3e38a9b..d33210f 100644 --- a/crates/exoharness/src/basic.rs +++ b/crates/exoharness/src/basic.rs @@ -36,14 +36,13 @@ use crate::{ BoxAsyncRead, BoxAsyncWrite, CancelSandboxProcessRequest, CloseSandboxProcessInputRequest, ConversationHandle, ConversationId, ConversationRecord, CreateSandboxRequest, Event, EventData, EventId, EventKind, EventQuery, EventQueryDirection, EventStream, ExoHarness, FileSystemMount, - ForkConversationRequest, GetEventsResult, GetSandboxProcessEventsResult, - ListConversationsRequest, ListConversationsResult, NewAgentRequest, NewConversationRequest, - PutSecretRequest, ReadArtifactRequest, Result, RunInSandboxRequest, SandboxId, SandboxProcess, - SandboxProcessEvent, SandboxProcessEventQuery, SandboxProcessId, SandboxProcessMode, - SandboxProcessParts, SandboxProcessRecord, SandboxProcessStatus, SandboxProcessStdin, - SandboxProvider, SandboxProviderConfig, Secret, SecretId, SecretMetadata, SecretType, - SessionId, SnapshotId, StartSandboxProcessRequest, StartSandboxRequest, TurnHandle, TurnId, - TurnRecord, Uuid7, WaitSandboxProcessRequest, WriteArtifactRequest, + ForkConversationRequest, GetEventsResult, GetSandboxProcessEventsResult, NewAgentRequest, + NewConversationRequest, PutSecretRequest, ReadArtifactRequest, Result, RunInSandboxRequest, + SandboxId, SandboxProcess, SandboxProcessEvent, SandboxProcessEventQuery, SandboxProcessId, + SandboxProcessMode, SandboxProcessParts, SandboxProcessRecord, SandboxProcessStatus, + SandboxProcessStdin, SandboxProvider, SandboxProviderConfig, Secret, SecretId, SecretMetadata, + SecretType, SessionId, SnapshotId, StartSandboxProcessRequest, StartSandboxRequest, TurnHandle, + TurnId, TurnRecord, Uuid7, WaitSandboxProcessRequest, WriteArtifactRequest, WriteSandboxProcessInputRequest, }; @@ -66,7 +65,6 @@ pub enum SandboxBackendChoice { LocalProcess, Daytona(DaytonaBackendSpec), Vercel(VercelBackendSpec), - AwsAgentCore, } impl SandboxBackendChoice { @@ -77,7 +75,6 @@ impl SandboxBackendChoice { Self::LocalProcess => SandboxProvider::LocalProcess, Self::Daytona(_) => SandboxProvider::Daytona, Self::Vercel(_) => SandboxProvider::Vercel, - Self::AwsAgentCore => SandboxProvider::AwsAgentCore, } } } @@ -208,23 +205,6 @@ impl BasicExoHarnessInner { }; Arc::new(crate::VercelSandboxBackend::new(config)?) } - SandboxBackendChoice::AwsAgentCore => { - #[cfg(feature = "aws-agentcore")] - { - let config = self.aws_agentcore_config_from_binding().await?.ok_or_else(|| { - anyhow!( - "aws-agentcore sandbox requested but no sandbox provider binding is configured; run `exo provider configure --provider aws-agentcore --runtime-arn `" - ) - })?; - Arc::new(crate::AwsAgentCoreSandboxBackend::new(config).await?) - } - #[cfg(not(feature = "aws-agentcore"))] - { - bail!( - "aws-agentcore sandbox backend requires building exoharness with the aws-agentcore feature" - ); - } - } }; Ok(backend) } @@ -418,38 +398,6 @@ impl BasicExoHarnessInner { })) } - #[cfg(feature = "aws-agentcore")] - async fn aws_agentcore_config_from_binding(&self) -> Result> { - let bindings = list_binding_records(&self.storage, Path::new("bindings")).await?; - let Some((runtime_arn, region, qualifier, endpoint_url)) = bindings - .into_iter() - .rev() - .find_map(|record| match record.binding { - Binding::Sandbox { - config: - SandboxProviderConfig::AwsAgentCore { - runtime_arn, - region, - qualifier, - endpoint_url, - .. - }, - .. - } => Some((runtime_arn, region, qualifier, endpoint_url)), - _ => None, - }) - else { - return Ok(None); - }; - Ok(Some(crate::AwsAgentCoreConfig { - runtime_arn, - region, - qualifier, - endpoint_url, - credentials: aws_agentcore_credentials_from_env(), - })) - } - /// The configured default base image for `provider`, from the newest /// `Binding::Sandbox` for it. `None` when no such binding exists, so the /// backend applies its own intrinsic default. @@ -464,25 +412,6 @@ impl BasicExoHarnessInner { } } -#[cfg(feature = "aws-agentcore")] -fn aws_agentcore_credentials_from_env() -> Option { - let access_key_id = std::env::var("AWS_AGENTCORE_ACCESS_KEY_ID") - .ok() - .filter(|value| !value.trim().is_empty())?; - let secret_access_key = std::env::var("AWS_AGENTCORE_SECRET_ACCESS_KEY") - .ok() - .filter(|value| !value.trim().is_empty())?; - let session_token = std::env::var("AWS_AGENTCORE_SESSION_TOKEN") - .ok() - .filter(|value| !value.trim().is_empty()); - - Some(crate::AwsAgentCoreCredentials { - access_key_id, - secret_access_key, - session_token, - }) -} - impl BasicExoHarness { pub async fn new(config: BasicExoHarnessConfig) -> Result { Self::new_with_backend(config, None).await @@ -716,23 +645,16 @@ impl AgentHandle for BasicAgentHandle { &self.record } - async fn list_conversations( - &self, - request: ListConversationsRequest, - ) -> Result>> { + async fn list_conversations(&self) -> Result>> { let mut handles: Vec> = Vec::new(); - let result = self.list_conversation_records(request).await?; - for record in result.conversations { + for record in self.list_conversation_records().await? { handles.push(Arc::new(BasicConversationHandle { harness: self.harness.clone(), agent_id: self.record.id, record, })); } - Ok(ListConversationsResult { - conversations: handles, - next_cursor: result.next_cursor, - }) + Ok(handles) } async fn get_conversation( @@ -764,10 +686,7 @@ impl AgentHandle for BasicAgentHandle { request: NewConversationRequest, ) -> Result> { let _guard = self.harness.inner.write_lock.lock().await; - let existing = self - .list_conversation_records(ListConversationsRequest::default()) - .await? - .conversations; + let existing = self.list_conversation_records().await?; let slug = match request.slug { Some(slug) => { if existing @@ -993,10 +912,7 @@ impl BasicAgentHandle { self.agent_dir().join("artifacts") } - async fn list_conversation_records( - &self, - request: ListConversationsRequest, - ) -> Result> { + async fn list_conversation_records(&self) -> Result> { let mut conversations = Vec::new(); for key in self .harness @@ -1016,48 +932,11 @@ impl BasicAgentHandle { .await?, ); } - conversations.sort_by_key(conversation_recency_key); - conversations.reverse(); - paginate_conversation_records(conversations, request) + conversations.sort_by_key(|record| record.id); + Ok(conversations) } } -fn conversation_recency_key(record: &ConversationRecord) -> Uuid7 { - record.latest_event_id.unwrap_or(record.id) -} - -fn paginate_conversation_records( - conversations: Vec, - request: ListConversationsRequest, -) -> Result> { - let start = match request.cursor { - Some(cursor) => conversations - .iter() - .position(|conversation| conversation_recency_key(conversation) == cursor) - .map(|index| index + 1) - .ok_or_else(|| anyhow!("conversation cursor not found: {cursor}"))?, - None => 0, - }; - let remaining = conversations.len().saturating_sub(start); - let Some(limit) = request.limit.filter(|limit| *limit > 0) else { - return Ok(ListConversationsResult { - conversations: conversations.into_iter().skip(start).collect(), - next_cursor: None, - }); - }; - let has_more = remaining > limit; - let page: Vec<_> = conversations.into_iter().skip(start).take(limit).collect(); - let next_cursor = if has_more { - page.last().map(conversation_recency_key) - } else { - None - }; - Ok(ListConversationsResult { - conversations: page, - next_cursor, - }) -} - struct BasicConversationHandle { harness: BasicExoHarness, agent_id: AgentId, @@ -1313,6 +1192,7 @@ impl ConversationHandle for BasicConversationHandle { events_to_append.push(EventData::Messages { messages: request.input, response_id: None, + usage: None, }); } @@ -1462,10 +1342,7 @@ impl ConversationHandle for BasicConversationHandle { .get_json::(self.agent_dir().join("record.json")) .await?, }; - let existing = agent - .list_conversation_records(ListConversationsRequest::default()) - .await? - .conversations; + let existing = agent.list_conversation_records().await?; let slug = match request.slug { Some(slug) => { if existing diff --git a/crates/exoharness/src/basic_tests.rs b/crates/exoharness/src/basic_tests.rs index 36cd91d..18b38c4 100644 --- a/crates/exoharness/src/basic_tests.rs +++ b/crates/exoharness/src/basic_tests.rs @@ -40,17 +40,6 @@ async fn basic_backend_supports_agent_and_conversation_crud() { crate::contract_tests::supports_agent_and_conversation_crud(harness).await; } -#[tokio::test(flavor = "current_thread")] -async fn basic_backend_lists_conversations_recent_first_and_paginates() { - let tempdir = TempDir::new().expect("tempdir"); - let harness: std::sync::Arc = std::sync::Arc::new( - BasicExoHarness::new(local_test_config(tempdir.path())) - .await - .expect("harness should initialize"), - ); - crate::contract_tests::list_conversations_returns_recent_first_and_paginates(harness).await; -} - #[tokio::test(flavor = "current_thread")] async fn basic_backend_contract_begin_turn_tracks_events_through_finish() { let tempdir = TempDir::new().expect("tempdir"); @@ -313,6 +302,7 @@ async fn turn_events_continue_after_artifact_writes() { turn.add_events(vec![EventData::Messages { messages: vec![assistant_message("pong")], response_id: None, + usage: None, }]) .await .expect("append after artifact write"); diff --git a/crates/exoharness/src/contract_tests.rs b/crates/exoharness/src/contract_tests.rs index a2f9783..897e97e 100644 --- a/crates/exoharness/src/contract_tests.rs +++ b/crates/exoharness/src/contract_tests.rs @@ -8,10 +8,9 @@ use lingua::universal::{AssistantContent, UserContent}; use tokio::time::timeout; use crate::{ - AddEventsRequest, BeginTurnRequest, Binding, EventData, EventKind, EventQuery, - EventQueryDirection, ExoHarness, ForkConversationRequest, ListConversationsRequest, - ManagedSandboxHandle, NewAgentRequest, NewConversationRequest, SandboxCommand, Uuid7, - WriteArtifactRequest, + BeginTurnRequest, Binding, EventData, EventKind, EventQuery, EventQueryDirection, ExoHarness, + ForkConversationRequest, ManagedSandboxHandle, NewAgentRequest, NewConversationRequest, + SandboxCommand, Uuid7, WriteArtifactRequest, }; pub async fn supports_agent_and_conversation_crud(harness: Arc) { @@ -52,10 +51,9 @@ pub async fn supports_agent_and_conversation_crud(harness: Arc) ); assert!( agent - .list_conversations(crate::ListConversationsRequest::default()) + .list_conversations() .await .expect("list conversations") - .conversations .iter() .any(|candidate| candidate.record().id == conversation.record().id) ); @@ -74,85 +72,6 @@ pub async fn supports_agent_and_conversation_crud(harness: Arc) ); } -pub async fn list_conversations_returns_recent_first_and_paginates(harness: Arc) { - let agent = harness - .new_agent(NewAgentRequest { - slug: unique_slug("agent"), - name: "Agent".to_string(), - }) - .await - .expect("agent should be created"); - let first = agent - .new_conversation(NewConversationRequest { - slug: Some(unique_slug("first")), - name: Some("First".to_string()), - }) - .await - .expect("first conversation"); - tokio::time::sleep(Duration::from_millis(2)).await; - let second = agent - .new_conversation(NewConversationRequest { - slug: Some(unique_slug("second")), - name: Some("Second".to_string()), - }) - .await - .expect("second conversation"); - tokio::time::sleep(Duration::from_millis(2)).await; - let third = agent - .new_conversation(NewConversationRequest { - slug: Some(unique_slug("third")), - name: Some("Third".to_string()), - }) - .await - .expect("third conversation"); - tokio::time::sleep(Duration::from_millis(2)).await; - first - .add_events(AddEventsRequest { - session_id: None, - turn_id: None, - expected_head: None, - data: vec![EventData::Custom { - event_type: "touch".to_string(), - payload: serde_json::Value::Null, - }], - }) - .await - .expect("touch first conversation"); - - let page = agent - .list_conversations(ListConversationsRequest { - cursor: None, - limit: Some(2), - }) - .await - .expect("first page"); - let page_ids: Vec<_> = page - .conversations - .iter() - .map(|conversation| conversation.record().id) - .collect(); - assert_eq!(page_ids, vec![first.record().id, third.record().id]); - assert_eq!( - page.next_cursor, - Some(third.record().latest_event_id.unwrap_or(third.record().id)) - ); - - let next_page = agent - .list_conversations(ListConversationsRequest { - cursor: page.next_cursor, - limit: Some(2), - }) - .await - .expect("second page"); - let next_page_ids: Vec<_> = next_page - .conversations - .iter() - .map(|conversation| conversation.record().id) - .collect(); - assert_eq!(next_page_ids, vec![second.record().id]); - assert_eq!(next_page.next_cursor, None); -} - pub async fn begin_turn_tracks_events_through_finish(harness: Arc) { let agent = harness .new_agent(NewAgentRequest { @@ -176,6 +95,7 @@ pub async fn begin_turn_tracks_events_through_finish(harness: Arc Result>> { + async fn list_conversations(&self) -> Result>> { match self .harness .request(Request::ListConversations { agent_id: self.record.id, - request, }) .await? { - Response::Conversations { result } => Ok(ListConversationsResult { - conversations: result - .conversations - .into_iter() - .map(|conversation| { - Arc::new(HttpConversationHandle::new( - self.harness.clone(), - conversation, - )) as _ - }) - .collect(), - next_cursor: result.next_cursor, - }), + Response::Conversations { conversations } => Ok(conversations + .into_iter() + .map(|conversation| { + Arc::new(HttpConversationHandle::new( + self.harness.clone(), + conversation, + )) as _ + }) + .collect()), response => unexpected_response(response, "conversations"), } } diff --git a/crates/exoharness/src/http_tests.rs b/crates/exoharness/src/http_tests.rs index 8312d8e..59dae77 100644 --- a/crates/exoharness/src/http_tests.rs +++ b/crates/exoharness/src/http_tests.rs @@ -48,15 +48,6 @@ async fn http_exoharness_supports_agent_and_conversation_crud() { crate::contract_tests::supports_agent_and_conversation_crud(Arc::clone(&fixture.harness)).await; } -#[actix_web::test] -async fn http_exoharness_lists_conversations_recent_first_and_paginates() { - let fixture = http_harness().await; - crate::contract_tests::list_conversations_returns_recent_first_and_paginates(Arc::clone( - &fixture.harness, - )) - .await; -} - #[actix_web::test] async fn http_exoharness_begin_turn_tracks_events_through_finish() { let fixture = http_harness().await; diff --git a/crates/exoharness/src/protocol.rs b/crates/exoharness/src/protocol.rs index ef53608..40f8bc7 100644 --- a/crates/exoharness/src/protocol.rs +++ b/crates/exoharness/src/protocol.rs @@ -5,11 +5,11 @@ use crate::{ BeginTurnRequest, Binding, BindingId, BindingRecord, CancelSandboxProcessRequest, CloseSandboxProcessInputRequest, ConversationId, ConversationRecord, CreateSandboxRequest, Event, EventData, EventId, EventQuery, ForkConversationRequest, GetEventsResult, - GetSandboxProcessEventsResult, ListConversationsRequest, ListConversationsResult, - NewAgentRequest, NewConversationRequest, PutSecretRequest, ReadArtifactRequest, SandboxId, - SandboxProcessEventQuery, SandboxProcessRecord, SandboxProcessStatus, Secret, SecretId, - SecretMetadata, SessionId, SnapshotId, StartSandboxProcessRequest, StartSandboxRequest, TurnId, - TurnRecord, WaitSandboxProcessRequest, WriteArtifactRequest, WriteSandboxProcessInputRequest, + GetSandboxProcessEventsResult, NewAgentRequest, NewConversationRequest, PutSecretRequest, + ReadArtifactRequest, SandboxId, SandboxProcessEventQuery, SandboxProcessRecord, + SandboxProcessStatus, Secret, SecretId, SecretMetadata, SessionId, SnapshotId, + StartSandboxProcessRequest, StartSandboxRequest, TurnId, TurnRecord, WaitSandboxProcessRequest, + WriteArtifactRequest, WriteSandboxProcessInputRequest, }; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] @@ -70,7 +70,6 @@ pub enum Request { }, ListConversations { agent_id: AgentId, - request: ListConversationsRequest, }, GetConversation { agent_id: AgentId, @@ -343,7 +342,7 @@ pub enum Response { value: bool, }, Conversations { - result: ListConversationsResult, + conversations: Vec, }, Conversation { conversation: Option, diff --git a/crates/exoharness/src/sandbox_provider/aws_agentcore.rs b/crates/exoharness/src/sandbox_provider/aws_agentcore.rs deleted file mode 100644 index dfee119..0000000 --- a/crates/exoharness/src/sandbox_provider/aws_agentcore.rs +++ /dev/null @@ -1,421 +0,0 @@ -use std::sync::Arc; - -use anyhow::{Context, Result, bail}; -use async_trait::async_trait; -use aws_config::{BehaviorVersion, Region}; -use aws_credential_types::Credentials; -use aws_sdk_bedrockagentcore::Client; -use aws_sdk_bedrockagentcore::primitives::Blob; -use serde::{Deserialize, Serialize}; - -use crate::sandbox::{ - ManagedSandboxBackend, ManagedSandboxHandle, SandboxCommand, SandboxCommandOutput, - SandboxNetworkPolicy, SandboxRequest, SandboxSpec, SnapshotPayload, sandbox_spec_hash, -}; -use crate::sandbox_provider::process_bridge; - -pub fn default_aws_agentcore_image() -> String { - String::new() -} - -#[derive(Debug, Clone)] -pub struct AwsAgentCoreConfig { - pub runtime_arn: String, - pub region: String, - pub qualifier: Option, - pub endpoint_url: Option, - pub credentials: Option, -} - -#[derive(Debug, Clone)] -pub struct AwsAgentCoreCredentials { - pub access_key_id: String, - pub secret_access_key: String, - pub session_token: Option, -} - -pub struct AwsAgentCoreSandboxBackend { - client: Client, - runtime_arn: String, - invoke_target: AwsAgentCoreInvokeTarget, - qualifier: Option, -} - -impl AwsAgentCoreSandboxBackend { - pub async fn new(config: AwsAgentCoreConfig) -> Result { - if config.runtime_arn.trim().is_empty() { - bail!("AgentCore runtime ARN must not be empty"); - } - if config.region.trim().is_empty() { - bail!("AgentCore region must not be empty"); - } - let region = config.region; - let mut sdk_config_loader = - aws_config::defaults(BehaviorVersion::latest()).region(Region::new(region.clone())); - if let Some(credentials) = config.credentials { - sdk_config_loader = sdk_config_loader.credentials_provider(Credentials::new( - credentials.access_key_id, - credentials.secret_access_key, - credentials.session_token, - None, - "aws-agentcore", - )); - } - let sdk_config = sdk_config_loader.load().await; - let mut service_config_builder = - aws_sdk_bedrockagentcore::config::Builder::from(&sdk_config); - if let Some(endpoint_url) = config.endpoint_url { - service_config_builder = service_config_builder.endpoint_url(endpoint_url); - } else { - service_config_builder.set_endpoint_url(None); - } - let service_config = service_config_builder.build(); - let invoke_target = agentcore_invoke_target(&config.runtime_arn)?; - Ok(Self { - client: Client::from_conf(service_config), - runtime_arn: config.runtime_arn, - invoke_target, - qualifier: config.qualifier, - }) - } -} - -#[async_trait] -impl ManagedSandboxBackend for AwsAgentCoreSandboxBackend { - async fn acquire(&self, request: SandboxRequest) -> Result> { - reject_unsupported_request(&request)?; - let spec_hash = sandbox_spec_hash(&request.spec); - let runtime_session_id = agentcore_runtime_session_id(&request, &spec_hash); - Ok(Arc::new(AwsAgentCoreSandboxHandle { - id: format!("aws-agentcore:{runtime_session_id}"), - runtime_session_id, - request, - backend: AwsAgentCoreBackendHandle { - client: self.client.clone(), - runtime_arn: self.runtime_arn.clone(), - invoke_target: self.invoke_target.clone(), - qualifier: self.qualifier.clone(), - }, - })) - } - - async fn acquire_from_snapshot( - &self, - _request: SandboxRequest, - _payload: SnapshotPayload, - ) -> Result> { - bail!("restoring an AgentCore sandbox from a snapshot is not implemented yet"); - } -} - -#[derive(Clone)] -struct AwsAgentCoreBackendHandle { - client: Client, - runtime_arn: String, - invoke_target: AwsAgentCoreInvokeTarget, - qualifier: Option, -} - -#[derive(Clone)] -struct AwsAgentCoreInvokeTarget { - runtime_identifier: String, - account_id: String, -} - -struct AwsAgentCoreSandboxHandle { - id: String, - runtime_session_id: String, - request: SandboxRequest, - backend: AwsAgentCoreBackendHandle, -} - -#[async_trait] -impl ManagedSandboxHandle for AwsAgentCoreSandboxHandle { - fn id(&self) -> &str { - &self.id - } - - async fn exec(&self, command: &SandboxCommand) -> Result { - exec_in_agentcore( - &self.backend, - &self.runtime_session_id, - &self.request.spec, - command, - ) - .await - } - - async fn start_process(&self, command: &SandboxCommand) -> Result { - start_process_in_agentcore( - &self.backend, - &self.runtime_session_id, - &self.request.spec, - command, - ) - .await - } - - async fn stop(&self) -> Result<()> { - let mut request = self - .backend - .client - .stop_runtime_session() - .agent_runtime_arn(self.backend.runtime_arn.clone()) - .runtime_session_id(self.runtime_session_id.clone()); - if let Some(qualifier) = &self.backend.qualifier { - request = request.qualifier(qualifier.clone()); - } - request.send().await.with_context(|| { - format!( - "stopping AgentCore runtime session {}", - self.runtime_session_id - ) - })?; - Ok(()) - } - - async fn snapshot(&self) -> Result { - bail!("AgentCore sandbox snapshots are not implemented yet"); - } -} - -async fn exec_in_agentcore( - backend: &AwsAgentCoreBackendHandle, - runtime_session_id: &str, - spec: &SandboxSpec, - command: &SandboxCommand, -) -> Result { - if command.argv.is_empty() { - bail!("sandbox command requires at least one argv entry"); - } - let cwd = command - .cwd - .clone() - .unwrap_or_else(|| spec.default_workdir.clone()); - let body = AgentCoreExecRequest { - request_type: "exec", - argv: command.argv.clone(), - env: command.env.clone(), - cwd: cwd.clone(), - timeout_ms: command.timeout.map(duration_to_millis), - }; - let response: AgentCoreExecResponse = - invoke_agentcore_json(backend, runtime_session_id, &body).await?; - if let Some(error) = response.error { - bail!("AgentCore exec failed: {error}"); - } - let exit_code = response - .exit_code - .context("AgentCore exec response did not include exit_code")?; - let ok = response.ok.unwrap_or(exit_code == 0); - Ok(SandboxCommandOutput { - ok, - exit_code: Some(exit_code), - stdout: response.stdout.unwrap_or_default(), - stderr: response.stderr.unwrap_or_default(), - command: command.argv.clone(), - cwd: response.cwd.unwrap_or(cwd), - }) -} - -async fn start_process_in_agentcore( - backend: &AwsAgentCoreBackendHandle, - runtime_session_id: &str, - spec: &SandboxSpec, - command: &SandboxCommand, -) -> Result { - if command.argv.is_empty() { - bail!("sandbox command requires at least one argv entry"); - } - let cwd = command - .cwd - .clone() - .unwrap_or_else(|| spec.default_workdir.clone()); - let response: AgentCoreStartProcessResponse = invoke_agentcore_json( - backend, - runtime_session_id, - &AgentCoreStartProcessRequest { - request_type: "start_process", - argv: command.argv.clone(), - env: command.env.clone(), - cwd, - }, - ) - .await?; - let client = AwsAgentCoreProcessBridgeClient { - backend: backend.clone(), - runtime_session_id: runtime_session_id.to_string(), - process_id: response.process_id, - }; - Ok(process_bridge::process_parts(Arc::new(client))) -} - -struct AwsAgentCoreProcessBridgeClient { - backend: AwsAgentCoreBackendHandle, - runtime_session_id: String, - process_id: String, -} - -#[async_trait] -impl process_bridge::Client for AwsAgentCoreProcessBridgeClient { - async fn request(&self, request: process_bridge::Request) -> Result { - invoke_agentcore_json( - &self.backend, - &self.runtime_session_id, - &AgentCoreProcessBridgeRequest { - request_type: "process_bridge", - process_id: self.process_id.clone(), - request, - }, - ) - .await - } -} - -async fn invoke_agentcore_json( - backend: &AwsAgentCoreBackendHandle, - runtime_session_id: &str, - body: &Request, -) -> Result -where - Request: Serialize, - Response: for<'de> Deserialize<'de>, -{ - let payload = serde_json::to_vec(body).context("serializing AgentCore runtime request")?; - let mut request = backend - .client - .invoke_agent_runtime() - .agent_runtime_arn(backend.invoke_target.runtime_identifier.clone()) - .account_id(backend.invoke_target.account_id.clone()) - .runtime_session_id(runtime_session_id.to_string()) - .content_type("application/json") - .accept("application/json") - .payload(Blob::new(payload)); - if let Some(qualifier) = &backend.qualifier { - request = request.qualifier(qualifier.clone()); - } - let output = request.send().await.with_context(|| { - format!( - "invoking AgentCore runtime session {runtime_session_id} with runtime {} in account {}", - backend.invoke_target.runtime_identifier, backend.invoke_target.account_id, - ) - })?; - let status_code = output.status_code.unwrap_or(200); - let bytes = output - .response - .collect() - .await - .context("reading AgentCore runtime response body")? - .into_bytes(); - if !(200..300).contains(&status_code) { - let text = String::from_utf8_lossy(&bytes); - bail!("AgentCore runtime returned status {status_code}: {text}"); - } - serde_json::from_slice(&bytes).with_context(|| { - let text = String::from_utf8_lossy(&bytes); - format!("decoding AgentCore runtime JSON response: {text}") - }) -} - -fn agentcore_invoke_target(runtime_arn: &str) -> Result { - let mut parts = runtime_arn.splitn(6, ':'); - let arn = parts.next(); - let _partition = parts.next(); - let service = parts.next(); - let _region = parts.next(); - let account_id = parts.next(); - let resource = parts.next(); - let runtime_identifier = resource.and_then(|resource| resource.strip_prefix("runtime/")); - match (arn, service, account_id, runtime_identifier) { - (Some("arn"), Some("bedrock-agentcore"), Some(account_id), Some(runtime_identifier)) - if !account_id.is_empty() && !runtime_identifier.is_empty() => - { - Ok(AwsAgentCoreInvokeTarget { - runtime_identifier: runtime_identifier.to_string(), - account_id: account_id.to_string(), - }) - } - _ => bail!( - "AgentCore runtime ARN must have the form arn:...:bedrock-agentcore:...::runtime/" - ), - } -} - -fn reject_unsupported_request(request: &SandboxRequest) -> Result<()> { - if !request.spec.mounts.is_empty() { - bail!( - "AgentCore sandbox backend does not support host bind-mounts; remove conversation mounts or use a local sandbox provider" - ); - } - if matches!(request.spec.network, SandboxNetworkPolicy::Disabled) { - bail!("AgentCore sandbox backend cannot enforce disabled networking"); - } - Ok(()) -} - -fn agentcore_runtime_session_id(request: &SandboxRequest, spec_hash: &str) -> String { - let input = format!("{}\n{spec_hash}", request.key); - format!("exo-{}-session-0000000000", stable_fnv1a_hex(&input)) -} - -fn stable_fnv1a_hex(input: &str) -> String { - let mut hash = 0xcbf29ce484222325u64; - for byte in input.as_bytes() { - hash ^= u64::from(*byte); - hash = hash.wrapping_mul(0x100000001b3); - } - format!("{hash:016x}") -} - -fn duration_to_millis(duration: std::time::Duration) -> u64 { - duration.as_millis().min(u128::from(u64::MAX)) as u64 -} - -#[derive(Debug, Serialize)] -struct AgentCoreExecRequest { - #[serde(rename = "type")] - request_type: &'static str, - argv: Vec, - env: std::collections::HashMap, - cwd: String, - #[serde(skip_serializing_if = "Option::is_none")] - timeout_ms: Option, -} - -#[derive(Debug, Serialize)] -struct AgentCoreStartProcessRequest { - #[serde(rename = "type")] - request_type: &'static str, - argv: Vec, - env: std::collections::HashMap, - cwd: String, -} - -#[derive(Debug, Deserialize)] -struct AgentCoreStartProcessResponse { - process_id: String, -} - -#[derive(Debug, Serialize)] -struct AgentCoreProcessBridgeRequest { - #[serde(rename = "type")] - request_type: &'static str, - process_id: String, - request: process_bridge::Request, -} - -#[derive(Debug, Deserialize)] -struct AgentCoreExecResponse { - #[serde(default)] - ok: Option, - #[serde(default)] - exit_code: Option, - #[serde(default)] - stdout: Option, - #[serde(default)] - stderr: Option, - #[serde(default)] - cwd: Option, - #[serde(default)] - error: Option, -} diff --git a/crates/exoharness/src/sandbox_provider/mod.rs b/crates/exoharness/src/sandbox_provider/mod.rs index a876438..15bcd63 100644 --- a/crates/exoharness/src/sandbox_provider/mod.rs +++ b/crates/exoharness/src/sandbox_provider/mod.rs @@ -10,22 +10,6 @@ mod daytona { "daytonaio/sandbox:0.8.0".to_string() } } -#[cfg(all( - not(target_arch = "wasm32"), - feature = "basic-backend", - feature = "aws-agentcore" -))] -mod aws_agentcore; -#[cfg(not(all( - not(target_arch = "wasm32"), - feature = "basic-backend", - feature = "aws-agentcore" -)))] -mod aws_agentcore { - pub fn default_aws_agentcore_image() -> String { - String::new() - } -} #[cfg(all(not(target_arch = "wasm32"), feature = "basic-backend"))] mod process_bridge; #[cfg(all(not(target_arch = "wasm32"), feature = "basic-backend"))] @@ -37,13 +21,6 @@ mod vercel { } } -pub use aws_agentcore::default_aws_agentcore_image; -#[cfg(all( - not(target_arch = "wasm32"), - feature = "basic-backend", - feature = "aws-agentcore" -))] -pub use aws_agentcore::{AwsAgentCoreConfig, AwsAgentCoreCredentials, AwsAgentCoreSandboxBackend}; pub use daytona::default_daytona_image; #[cfg(all(not(target_arch = "wasm32"), feature = "basic-backend"))] pub use daytona::{ diff --git a/crates/exoharness/src/server.rs b/crates/exoharness/src/server.rs index 84eb7a2..cfa0c0e 100644 --- a/crates/exoharness/src/server.rs +++ b/crates/exoharness/src/server.rs @@ -5,8 +5,8 @@ use tokio::io::{AsyncBufReadExt, AsyncRead, AsyncWrite, AsyncWriteExt, BufReader use crate::protocol::{ClientMessage, ConversationHandleInfo, Request, Response, ServerMessage}; use crate::{ - AgentHandle, AgentId, ConversationHandle, ConversationId, ExoHarness, ListConversationsResult, - Result, SessionId, TurnHandle, TurnId, TurnRecord, + AgentHandle, AgentId, ConversationHandle, ConversationId, ExoHarness, Result, SessionId, + TurnHandle, TurnId, TurnRecord, }; pub struct ExoHarnessServer { @@ -63,21 +63,18 @@ impl ExoHarnessServer { Request::GetSecret { secret_id } => Ok(Response::Secret { secret: self.root.get_secret(&secret_id).await?, }), - Request::ListConversations { agent_id, request } => { + Request::ListConversations { agent_id } => { let agent = self.require_agent(&agent_id).await?; - let result = agent.list_conversations(request).await?; Ok(Response::Conversations { - result: ListConversationsResult { - conversations: result - .conversations - .into_iter() - .map(|conversation| ConversationHandleInfo { - agent_id, - record: conversation.record().clone(), - }) - .collect(), - next_cursor: result.next_cursor, - }, + conversations: agent + .list_conversations() + .await? + .into_iter() + .map(|conversation| ConversationHandleInfo { + agent_id, + record: conversation.record().clone(), + }) + .collect(), }) } Request::GetConversation { diff --git a/crates/exoharness/src/types.rs b/crates/exoharness/src/types.rs index 9061519..fd6ce6d 100644 --- a/crates/exoharness/src/types.rs +++ b/crates/exoharness/src/types.rs @@ -37,10 +37,7 @@ pub trait ExoHarness: Send + Sync { pub trait AgentHandle: Send + Sync { fn record(&self) -> &AgentRecord; - async fn list_conversations( - &self, - request: ListConversationsRequest, - ) -> Result>>; + async fn list_conversations(&self) -> Result>>; async fn get_conversation( &self, id: &ConversationId, @@ -162,18 +159,6 @@ pub struct NewConversationRequest { pub name: Option, } -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] -pub struct ListConversationsRequest { - pub cursor: Option, - pub limit: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] -pub struct ListConversationsResult { - pub conversations: Vec, - pub next_cursor: Option, -} - #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct TurnRecord { pub id: TurnId, @@ -277,6 +262,31 @@ pub struct ForkConversationRequest { pub name: Option, } +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] +pub struct UsageRecord { + pub model: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub prompt_tokens: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub completion_tokens: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub prompt_cached_tokens: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub prompt_cache_creation_tokens: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub completion_reasoning_tokens: Option, + /// Cost in USD, computed at call time from the price table baked into + /// this binary. `None` if the model is not in the table. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub cost_usd: Option, + /// Time to first token (streaming only). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub ttft_ms: Option, + /// Wall-clock duration from request start to end of response. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub duration_ms: Option, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Event { pub id: EventId, @@ -310,6 +320,11 @@ pub enum EventData { Messages { messages: Vec, response_id: Option, + // Boxed to keep `EventData` small: `UsageRecord` is ~170 bytes and + // most events don't carry it, so inlining it would bloat every + // variant (and every enum that embeds `EventData`). + #[serde(default, skip_serializing_if = "Option::is_none")] + usage: Option>, }, ToolRequested { tool_call_id: ToolCallId, @@ -483,8 +498,6 @@ pub enum SandboxProvider { #[default] Daytona, Vercel, - #[serde(rename = "aws_agentcore", alias = "aws-agentcore")] - AwsAgentCore, AppleContainer, Docker, #[serde(alias = "local")] @@ -503,7 +516,6 @@ impl SandboxProvider { match self { Self::Daytona => "daytona", Self::Vercel => "vercel", - Self::AwsAgentCore => "aws-agentcore", Self::AppleContainer => "apple-container", Self::Docker => "docker", Self::LocalProcess => "local-process", @@ -524,7 +536,6 @@ impl FromStr for SandboxProvider { match value.trim() { "daytona" => Ok(Self::Daytona), "vercel" => Ok(Self::Vercel), - "aws-agentcore" | "aws_agentcore" => Ok(Self::AwsAgentCore), "apple-container" | "apple_container" => Ok(Self::AppleContainer), "docker" => Ok(Self::Docker), "local" | "local-process" | "local_process" => Ok(Self::LocalProcess), @@ -795,17 +806,6 @@ pub enum SandboxProviderConfig { #[serde(default = "crate::sandbox_provider::default_vercel_image")] default_image: String, }, - #[serde(rename = "aws_agentcore", alias = "aws-agentcore")] - AwsAgentCore { - runtime_arn: String, - region: String, - #[serde(default, skip_serializing_if = "Option::is_none")] - qualifier: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] - endpoint_url: Option, - #[serde(default = "crate::sandbox_provider::default_aws_agentcore_image")] - default_image: String, - }, } impl SandboxProviderConfig { @@ -814,7 +814,6 @@ impl SandboxProviderConfig { Self::Daytona { .. } => SandboxProvider::Daytona, Self::Vercel { .. } => SandboxProvider::Vercel, Self::Docker { .. } => SandboxProvider::Docker, - Self::AwsAgentCore { .. } => SandboxProvider::AwsAgentCore, } } @@ -823,8 +822,7 @@ impl SandboxProviderConfig { match self { Self::Daytona { default_image, .. } | Self::Vercel { default_image, .. } - | Self::Docker { default_image, .. } - | Self::AwsAgentCore { default_image, .. } => default_image, + | Self::Docker { default_image, .. } => default_image, } } } @@ -882,6 +880,51 @@ mod tests { ); } + #[test] + fn messages_event_deserializes_without_usage_field() { + // Older logs predate per-message cost tracking and have no `usage` + // field on Messages events; serde(default) must keep them readable. + let json = serde_json::json!({ + "type": "messages", + "messages": [], + "response_id": null, + }); + let event: EventData = serde_json::from_value(json).expect("legacy event should parse"); + match event { + EventData::Messages { usage, .. } => assert!(usage.is_none()), + _ => panic!("expected Messages variant"), + } + } + + #[test] + fn messages_event_serializes_usage_when_present() { + let event = EventData::Messages { + messages: vec![], + response_id: None, + usage: Some(Box::new(UsageRecord { + model: "claude-sonnet-4-6".to_string(), + prompt_tokens: Some(100), + completion_tokens: Some(50), + cost_usd: Some(0.001), + ..Default::default() + })), + }; + let value = serde_json::to_value(&event).expect("event should serialize"); + let usage = value.get("usage").expect("usage field present"); + assert_eq!( + usage.get("model").and_then(Value::as_str), + Some("claude-sonnet-4-6") + ); + assert_eq!( + usage.get("prompt_tokens").and_then(Value::as_i64), + Some(100) + ); + assert!((usage.get("cost_usd").and_then(Value::as_f64).unwrap() - 0.001).abs() < 1e-9); + // Round-trip back through the legacy-event parser + let parsed: EventData = serde_json::from_value(value).expect("round-trip parse"); + assert!(matches!(parsed, EventData::Messages { usage: Some(_), .. })); + } + #[test] fn serializes_mount_modes_as_ro_rw() { let ro = @@ -910,13 +953,11 @@ mod tests { "vercel".parse::().unwrap(), SandboxProvider::Vercel ); - assert!("agentcore".parse::().is_err()); assert_eq!( SandboxProvider::AppleContainer.to_string(), "apple-container" ); assert_eq!(SandboxProvider::Vercel.to_string(), "vercel"); - assert_eq!(SandboxProvider::AwsAgentCore.to_string(), "aws-agentcore"); assert_eq!(SandboxProvider::LocalProcess.to_string(), "local-process"); } } diff --git a/docs/cost-tracking-design.md b/docs/cost-tracking-design.md new file mode 100644 index 0000000..57ff2b9 --- /dev/null +++ b/docs/cost-tracking-design.md @@ -0,0 +1,180 @@ +# Per-Message Cost, Token, and Latency Tracking + +Every LLM response in exo carries a durable, per-message record of token usage +and timing in the canonical event log. **Dollar cost is a policy computed in +userspace, not by the trusted substrate** — the store persists whatever numbers +the event carries. Cost computation is an agent-side policy, so should not live +in the exoharness. + +This document describes what is recorded, who computes cost and where, and the +reasoning behind that split. + +## Design Principles + +- **Cost is policy, and policy lives in userspace.** How to turn tokens into + dollars (which price table, which provider formula, when to compute) is an + application decision, not a substrate responsibility. To not force each substrate + to reimplement, we provide a standalone cost library that executors call. +- **The substrate stays minimal.** `exoharness` stores the usage record verbatim, + including an optional `cost_usd` field, but contains no pricing code and never + computes, validates, or owns cost. It round-trips bytes. +- **Usage is agent-reported telemetry, not an attested ledger.** The token counts + come from the provider response, which for the TypeScript/exoclaw path is read + _inside the agent-side harness process_ (the model call is not made by the + trusted Rust core — there is no model-completion runtime request). So usage is + self-reported wherever cost is later computed. The implication is that it cannot + be fully trustworthy: the agent owns its own calls so can report whatever numbers + it wants. +- **A maintained data source, not a hand-rolled table.** Prices change often and + across many providers, so the _data_ is the community-maintained LiteLLM price + database — one source of truth, shared by every consumer of the cost library. +- **Backward and forward compatible.** The usage record and all sub-fields are + optional; legacy events without `usage` still deserialize, and `cost_usd` may + simply be null when no policy filled it. + +## What Is Recorded + +`EventData::Messages` gains an optional `usage` field holding a `UsageRecord` +(the type is event schema and lives in `exoharness`; the store treats it as data): + +| Field | Meaning | +| ------------------------------ | ---------------------------------------------------------------------- | +| `model` | Model id echoed by the provider, falling back to the requested binding | +| `prompt_tokens` | Input tokens (provider convention varies — see per-provider math) | +| `completion_tokens` | Output tokens | +| `prompt_cached_tokens` | Cache-read (discounted) input tokens | +| `prompt_cache_creation_tokens` | Cache-write input tokens (Anthropic) | +| `completion_reasoning_tokens` | Reasoning tokens, when surfaced | +| `cost_usd` | USD cost filled by userspace policy; null if no policy computed it | +| `ttft_ms` | Time to first token (streaming path only) | +| `duration_ms` | Wall-clock duration, request start to end of response | + +Every sub-field is `Option` with `skip_serializing_if`. The record is boxed +inside `EventData` (`Option>`) to keep the enum small and dodge +`large_enum_variant`; `Box` is serde-transparent, so the on-disk JSON is +unchanged. The two cache buckets are both kept because reads and writes bill at +different rates — cost can't be reconstructed from a single collapsed number. +They mirror lingua's `UniversalUsage` field names. + +## Layering: Who Computes Cost + +``` +TRUSTED SUBSTRATE USERSPACE (agent-side) +───────────────── ────────────────────── +exoharness store Rust executors (Basic/RLM) ─┐ + • persists UsageRecord verbatim ├─ cost library + • raw tokens + model + timing TS harness / exoclaw ───────┘ (policy) + • cost_usd is just a field; │ + NOTHING here computes it └─ fills cost_usd when building the + • no pricing dependency messages event (or leaves it null + for a reader to compute later) +``` + +- The emitter (whoever made the call and holds the provider usage) builds the + `UsageRecord` and, as a policy choice, fills `cost_usd` by calling the cost + library. Rust executors call the Rust crate; the TS harness calls the TS port. +- The substrate stores the record as-is. It has no pricing code and no dependency + on the cost library. +- _When_ cost is computed is an application choice: at write (emitter fills it) or + left null for a read-time/report path. The substrate is agnostic either way. + +## The Cost Library + +There are two userspaces (Rust executors, the TS harness), so the cost library +exists once per language. Each is **self-contained** — it owns both the math and +its own data loading, and neither depends on the other having run: + +- **Rust:** a `cost` crate (price table, lookup, `compute_cost_usd`, loader), used + by the Basic executor. The CLI loads the table once at startup (`--pricing-path` + / `--pricing-url`, `EXO_LITELLM_PRICES_*` as env) and injects it. `exoharness` + does not depend on it. +- **TypeScript:** a self-contained port (`@exo/model-runtime/cost`) used by the + exoclaw harness. It loads its own data through the harness's normal config flow + — reading `EXO_LITELLM_PRICES_PATH` from its inherited env, then its own cache, + then its own fetch — and computes cost when building the messages event. It does + **not** depend on the Rust loader having populated anything. + +Only the price _rates_ are a single source of truth (the upstream LiteLLM JSON); +the short per-provider formula and the loader logic are duplicated per language. +That duplication is the accepted cost of keeping cost a per-userspace policy +rather than a substrate service, and of each userspace owning its own data so +there is no cross-boundary coupling. + +### Loading the data + +Each loader resolves the table once and holds it as plain data, in this order: +explicit `EXO_LITELLM_PRICES_PATH` (or `--pricing-path` on the CLI) → fresh +on-disk cache (`$XDG_CACHE_HOME/exo/litellm_prices.json`, 24h TTL) → HTTP fetch +(`EXO_LITELLM_PRICES_URL` or the LiteLLM default, cached on success) → stale cache +→ none. A corrupt cache or unparseable fetch degrades to no table (cost stays +null, tokens persist); the cache is written only when a fetched body parses. The +two loaders share the same cache _path_ by convention, but neither requires the +other to have written it. + +### Per-provider cost math + +Providers disagree on what `prompt_tokens` _includes_, and getting it wrong +distorts cost by up to ~10× on cache-heavy requests: + +- **Anthropic-family** (`anthropic`, `vertex_ai-anthropic`, `azure_ai`): + `prompt_tokens` is **fresh** input only; cache reads and writes are separate and + additive — `prompt·in + cached·cache_read + cache_creation·cache_write + completion·out`. +- **Everything else** (`openai`, `mistral`, Bedrock, …): `prompt_tokens` is the + **total** input; cached is a subset to subtract first — + `(prompt − cached)·in + cached·cache_read + completion·out`. + +The additive set is a narrow list of first-party Anthropic providers; everything +else, including all of Bedrock, is treated as inclusive. Bedrock is not fully +handled — Bedrock-hosted Claude is really additive, so its cache-heavy costs are +off. Left as a TODO. Unclassified providers default to inclusive (safe when +`cached == 0`, the common case). + +### Model lookup + +Exact match first, then the longest entry key that is a prefix of the requested +model **at a token boundary** — the next character must be absent or a separator +(`-` / `:`). This resolves dated revisions (`claude-sonnet-4-6-20251022` → +`claude-sonnet-4-6`) while refusing to slide `gpt-4o-mini` onto a `gpt-4` entry +when `gpt-4o` is missing, so a model is never silently priced at a neighbor's +rate. + +## Testing + +- Cost library (Rust) unit tests: Anthropic additive (no cache / hits / creation), + inclusive (with/without cache, subtraction asserted), boundary-aware lookup + (incl. the `gpt-4o-mini` vs `gpt-4` non-match), `sample_spec` skip, + unknown-model null, provider classification (Bedrock → inclusive), and the + corrupt-cache-degrades-to-empty loader path. +- TS cost port: a parity unit test over the same fixtures so the two formulas + stay in agreement. +- Executors: assert the persisted `Messages` event carries the expected cost for + the Anthropic and inclusive paths, through a Rust executor and through the + exoclaw/TS path, so coverage is pinned on both userspaces. +- `server_duration_ms` removed; the legacy-no-`usage` backward-compat test stays. + +## Not in Scope (Intentional) + +- **Trusted/attested usage.** Usage is agent-reported (see principles); making it + tamper-evident means routing model calls through the Rust core — a separate + change. +- **`/cost` REPL surface.** Data is persisted; the UI is a follow-up. (It is one + natural read-time consumer of the cost library.) +- **Cache-tier resolution.** lingua's `UniversalUsage` collapses Anthropic's + 5-minute and 1-hour cache writes, so cost uses the 5-minute rate; + `cache_creation_input_token_cost_above_1hr` is parsed but unused. +- **Long-running staleness.** The data loads once at startup; a long-lived service + would need a periodic refresh. +- **Anthropic aggregate Admin API.** Org-level, aggregate-only; can't be tied to a + message. + +## Resolved Review Decisions + +All three review points are folded in: + +1. **Prefix-match boundary** (Alexsun1one) — adopted; lookup is boundary-aware. +2. **Bedrock classification** (Alexsun1one) — Bedrock is treated as inclusive for + now; correct additive handling for Bedrock-hosted Claude is left as a TODO. +3. **Env vars through clap** (ankrgyl) — the cost library loads at startup, so its + source is a CLI/clap-parsed input threaded in, with the `EXO_LITELLM_PRICES_*` + env vars kept as overrides. (Writing the "env vars via clap" convention up as a + repo skill remains a separate housekeeping task.) diff --git a/examples/typescript/turn-loop.ts b/examples/typescript/turn-loop.ts index 79a175a..d0f5d22 100644 --- a/examples/typescript/turn-loop.ts +++ b/examples/typescript/turn-loop.ts @@ -19,6 +19,7 @@ import { type ResponsesRuntimeLike, type TraceParent, } from "@exo/model-runtime/responses"; +import { ensureTable } from "@exo/model-runtime/cost"; import { resolveLlmBinding } from "./shared"; @@ -34,6 +35,7 @@ export async function runResponsesHarnessTurn( context: TurnContext, options: ResponsesTurnLoopOptions = {}, ): Promise { + await ensureTable(); // load the price table once so cost is ready when events are built const modelBinding = await resolveLlmBinding(context); const runtime = runtimeFromModelBinding(context.agentConfig, modelBinding); await runtime.runTurn(context, (turnParent) => diff --git a/tsconfig.json b/tsconfig.json index 452c477..1d14f24 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -16,6 +16,7 @@ "@exo/model-runtime/responses": [ "./typescript/model-runtime/responses.ts" ], + "@exo/model-runtime/cost": ["./typescript/model-runtime/cost.ts"], "@exo/codex/app-server": ["./typescript/codex/app-server.ts"], "@exo/cursor/sdk": ["./typescript/cursor/sdk.ts"], "@exo/cursor/protocol": ["./typescript/cursor/protocol.ts"] diff --git a/typescript/harness/index.ts b/typescript/harness/index.ts index 45847f6..2261299 100644 --- a/typescript/harness/index.ts +++ b/typescript/harness/index.ts @@ -437,11 +437,13 @@ export function assistantTextMessage(text: string): Message { export function messagesEvent( messages: Message[], responseId?: string, + usage?: JsonObject, ): EventData { return { type: "messages", messages, response_id: responseId, + ...(usage ? { usage } : {}), }; } diff --git a/typescript/model-runtime/cost.test.ts b/typescript/model-runtime/cost.test.ts new file mode 100644 index 0000000..a31b751 --- /dev/null +++ b/typescript/model-runtime/cost.test.ts @@ -0,0 +1,75 @@ +import { describe, expect, it } from "vitest"; + +import { computeCostUsd, lookup, parseTable } from "./cost"; + +const FIXTURE = `{ + "sample_spec": { "comment": "ignored" }, + "claude-sonnet-4-6": { + "litellm_provider": "anthropic", "input_cost_per_token": 3e-06, + "output_cost_per_token": 1.5e-05, "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 + }, + "gpt-4o-mini": { + "litellm_provider": "openai", "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 6e-07, "cache_read_input_token_cost": 7.5e-08 + }, + "gpt-4": { "litellm_provider": "openai", "input_cost_per_token": 3e-05 }, + "us.anthropic.claude-sonnet-4-6": { + "litellm_provider": "bedrock_converse", "input_cost_per_token": 3.3e-06, + "output_cost_per_token": 1.65e-05, "cache_read_input_token_cost": 3.3e-07 + } +}`; + +const table = parseTable(FIXTURE); + +describe("cost", () => { + it("skips sample_spec", () => { + expect(table.size).toBe(4); + }); + + it("resolves dated revisions, not neighbors", () => { + expect(lookup(table, "claude-sonnet-4-6-20251022")?.litellm_provider).toBe( + "anthropic", + ); + expect(lookup(table, "gpt-4o")).toBeUndefined(); + expect(lookup(table, "gpt-4-0613")).toBeDefined(); + }); + + it("bills Anthropic additively (prompt excludes cached)", () => { + // 500 fresh + 10k read + 200 out = 0.0015 + 0.003 + 0.003 + expect( + computeCostUsd(table, "claude-sonnet-4-6", { + prompt: 500, + completion: 200, + cached: 10_000, + }), + ).toBeCloseTo(0.0075, 12); + }); + + it("bills OpenAI inclusively (subtract cached)", () => { + expect( + computeCostUsd(table, "gpt-4o-mini", { + prompt: 2_000, + completion: 1_000, + cached: 500, + }), + ).toBeCloseTo(0.0008625, 12); + }); + + it("treats Bedrock as inclusive, not additive", () => { + const expected = 1_500 * 3.3e-6 + 500 * 3.3e-7 + 1_000 * 1.65e-5; + expect( + computeCostUsd(table, "us.anthropic.claude-sonnet-4-6", { + prompt: 2_000, + completion: 1_000, + cached: 500, + }), + ).toBeCloseTo(expected, 12); + }); + + it("returns null for unknown models", () => { + expect( + computeCostUsd(table, "acme-llm-9000", { prompt: 100, completion: 50 }), + ).toBeNull(); + }); +}); diff --git a/typescript/model-runtime/cost.ts b/typescript/model-runtime/cost.ts new file mode 100644 index 0000000..1ab3c00 --- /dev/null +++ b/typescript/model-runtime/cost.ts @@ -0,0 +1,184 @@ +// Cost policy for the TypeScript harness: a self-contained port of the `cost` +// crate's math and loader. It owns its own price data (env override, on-disk +// cache, or its own fetch) and does not depend on the Rust loader having run. + +import { mkdirSync, readFileSync, statSync, writeFileSync } from "node:fs"; +import { dirname } from "node:path"; + +export type ModelEntry = { + litellm_provider?: string; + input_cost_per_token?: number; + output_cost_per_token?: number; + cache_read_input_token_cost?: number; + cache_creation_input_token_cost?: number; +}; + +export type PricingTable = Map; + +export type TokenCounts = { + prompt?: number; + completion?: number; + cached?: number; + cacheCreation?: number; +}; + +export function parseTable(json: string): PricingTable { + const raw = JSON.parse(json) as Record; + const table: PricingTable = new Map(); + for (const [key, value] of Object.entries(raw)) { + if (key !== "sample_spec" && value !== null && typeof value === "object") { + table.set(key, value as ModelEntry); + } + } + return table; +} + +// Exact match, else the longest key that is a prefix of `model` at a token +// boundary (next char absent or `-`/`:`). +export function lookup( + table: PricingTable, + model: string, +): ModelEntry | undefined { + const exact = table.get(model); + if (exact) return exact; + let best: ModelEntry | undefined; + let bestLen = -1; + for (const [key, entry] of table) { + const next = model[key.length]; + if ( + key.length > bestLen && + model.startsWith(key) && + (next === undefined || next === "-" || next === ":") + ) { + best = entry; + bestLen = key.length; + } + } + return best; +} + +export function computeCostUsd( + table: PricingTable, + model: string, + tokens: TokenCounts, +): number | null { + const entry = lookup(table, model); + if (!entry || entry.input_cost_per_token == null) return null; + const input = entry.input_cost_per_token; + const output = entry.output_cost_per_token ?? 0; + const cacheRead = entry.cache_read_input_token_cost ?? input; + const cacheWrite = entry.cache_creation_input_token_cost ?? input; + + const prompt = Math.max(0, tokens.prompt ?? 0); + const completion = Math.max(0, tokens.completion ?? 0); + const cached = Math.max(0, tokens.cached ?? 0); + const created = Math.max(0, tokens.cacheCreation ?? 0); + + // Anthropic-family `prompt_tokens` excludes cached (additive); else inclusive. + const fresh = isAdditive(entry.litellm_provider) + ? prompt + : Math.max(0, prompt - cached); + return ( + fresh * input + + cached * cacheRead + + created * cacheWrite + + completion * output + ); +} + +function isAdditive(provider?: string): boolean { + return ( + provider != null && + (provider.startsWith("anthropic") || + provider.startsWith("vertex_ai-anthropic") || + provider === "azure_ai") + ); +} + +const DEFAULT_URL = + "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"; +const CACHE_TTL_MS = 24 * 60 * 60 * 1000; +const FETCH_TIMEOUT_MS = 5000; + +let table: PricingTable | null | undefined; +let loading: Promise | undefined; + +// The in-memory table once loaded; null until ensureTable() has resolved. +export function getTable(): PricingTable | null { + return table ?? null; +} + +// Loads the table once (memoized), owning its own data: env override -> fresh +// cache -> own fetch (cached) -> stale cache -> null. Independent of Rust. +export function ensureTable(): Promise { + loading ??= load().then((loaded) => (table = loaded)); + return loading; +} + +async function load(): Promise { + const override = process.env.EXO_LITELLM_PRICES_PATH; + if (override) return readTable(override); + + const cache = cachePath(); + if (cache && isFresh(cache)) { + const fresh = readTable(cache); + if (fresh) return fresh; + } + const fetched = await fetchTable( + process.env.EXO_LITELLM_PRICES_URL ?? DEFAULT_URL, + ); + if (fetched) { + if (cache) writeCache(cache, fetched.body); + return fetched.table; + } + return cache ? readTable(cache) : null; // stale cache fallback +} + +async function fetchTable( + url: string, +): Promise<{ table: PricingTable; body: string } | null> { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); + try { + const response = await fetch(url, { signal: controller.signal }); + if (!response.ok) return null; + const body = await response.text(); + return { table: parseTable(body), body }; + } catch { + return null; // network/parse failure -> fall back to cache or null + } finally { + clearTimeout(timer); + } +} + +function readTable(path: string): PricingTable | null { + try { + return parseTable(readFileSync(path, "utf8")); + } catch { + return null; // missing or unparseable -> no table + } +} + +function isFresh(path: string): boolean { + try { + return Date.now() - statSync(path).mtimeMs < CACHE_TTL_MS; + } catch { + return false; + } +} + +function writeCache(path: string, body: string): void { + try { + mkdirSync(dirname(path), { recursive: true }); + writeFileSync(path, body); + } catch { + // best effort; a missing cache just means we re-fetch next time + } +} + +function cachePath(): string | null { + const base = + process.env.XDG_CACHE_HOME ?? + (process.env.HOME ? `${process.env.HOME}/.cache` : null); + return base ? `${base}/exo/litellm_prices.json` : null; +} diff --git a/typescript/model-runtime/responses.ts b/typescript/model-runtime/responses.ts index 6ac23ab..c761adf 100644 --- a/typescript/model-runtime/responses.ts +++ b/typescript/model-runtime/responses.ts @@ -32,6 +32,7 @@ import { type ToolDefinition, type TurnContext, } from "../harness"; +import { computeCostUsd, getTable } from "./cost"; import type { ChatCompletion, ChatCompletionChunk, @@ -952,7 +953,7 @@ export function responseToLinguaEvents(response: Response): EventData[] { const events: EventData[] = []; const messages = responseMessages(response); if (messages.length > 0) { - events.push(messagesEvent(messages)); + events.push(messagesEvent(messages, undefined, usageRecord(response))); } for (const result of responseToolCallResults(response)) { if (result.type === "tool_call") { @@ -969,6 +970,29 @@ export function responseToLinguaEvents(response: Response): EventData[] { return events; } +// Policy: attach raw usage + cost to the messages event. cost_usd is filled from +// the shared price cache; left unset if the cache is unavailable. +function usageRecord(response: Response): JsonObject | undefined { + const usage = response.usage; + if (!usage) return undefined; + const prompt = usage.input_tokens; + const completion = usage.output_tokens; + const cached = usage.input_tokens_details?.cached_tokens; + const reasoning = usage.output_tokens_details?.reasoning_tokens; + const table = getTable(); + const cost = table + ? computeCostUsd(table, response.model, { prompt, completion, cached }) + : null; + + const record: JsonObject = { model: response.model }; + if (prompt != null) record.prompt_tokens = prompt; + if (completion != null) record.completion_tokens = completion; + if (cached != null) record.prompt_cached_tokens = cached; + if (reasoning != null) record.completion_reasoning_tokens = reasoning; + if (cost != null) record.cost_usd = cost; + return record; +} + export function responseStreamEventToLinguaEvents( event: ResponseStreamEvent, ): EventData[] {