diff --git a/Cargo.lock b/Cargo.lock
index 7e0f8ed..8064e70 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -50,7 +50,7 @@ dependencies = [
  "percent-encoding",
  "pin-project-lite",
  "rand 0.10.1",
- "sha1 0.11.0",
+ "sha1",
  "smallvec",
  "tokio",
  "tokio-util",
@@ -571,37 +571,6 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
-[[package]]
-name = "aws-config"
-version = "1.8.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "517aa062d8bd9015ee23d6daa5e1c1372328412fdae4e6c4c1be9b69c6ad37a2"
-dependencies = [
- "aws-credential-types",
- "aws-runtime",
- "aws-sdk-sso",
- "aws-sdk-ssooidc",
- "aws-sdk-sts",
- "aws-smithy-async",
- "aws-smithy-http",
- "aws-smithy-json",
- "aws-smithy-runtime",
- "aws-smithy-runtime-api",
- "aws-smithy-schema",
- "aws-smithy-types",
- "aws-types",
- "bytes",
- "fastrand",
- "hex",
- "http 1.4.0",
- "sha1 0.10.6",
- "time",
- "tokio",
- "tracing",
- "url",
- "zeroize",
-]
-
 [[package]]
 name = "aws-credential-types"
 version = "1.2.14"
@@ -614,28 +583,6 @@ dependencies = [
  "zeroize",
 ]
 
-[[package]]
-name = "aws-lc-rs"
-version = "1.17.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ec2f1fc3ec205783a5da9a7e6c1509cc69dedf09a1949e412c1e18469326d00"
-dependencies = [
- "aws-lc-sys",
- "zeroize",
-]
-
-[[package]]
-name = "aws-lc-sys"
-version = "0.41.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a2f9779ce85b93ab6170dd940ad0169b5766ff848247aff13bb788b832fe3f4"
-dependencies = [
- "cc",
- "cmake",
- "dunce",
- "fs_extra",
-]
-
 [[package]]
 name = "aws-runtime"
 version = "1.7.4"
@@ -662,32 +609,6 @@ dependencies = [
  "uuid",
 ]
 
-[[package]]
-name = "aws-sdk-bedrockagentcore"
-version = "1.45.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b49b793a055a6a745d80328da58e83d14675a5d0b9af32ed491b8b6cc9165f2"
-dependencies = [
- "arc-swap",
- "aws-credential-types",
- "aws-runtime",
- "aws-smithy-async",
- "aws-smithy-eventstream",
- "aws-smithy-http",
- "aws-smithy-json",
- "aws-smithy-observability",
- "aws-smithy-runtime",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "aws-types",
- "bytes",
- "fastrand",
- "http 0.2.12",
- "http 1.4.0",
- "regex-lite",
- "tracing",
-]
-
 [[package]]
 name = "aws-sdk-bedrockruntime"
 version = "1.131.0"
@@ -715,80 +636,6 @@ dependencies = [
  "tracing",
 ]
 
-[[package]]
-name = "aws-sdk-sso"
-version = "1.100.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bee2719d4a5e5e147bb9e9b77490df6ece750df1094968aa857b09b618a1881a"
-dependencies = [
- "aws-credential-types",
- "aws-runtime",
- "aws-smithy-async",
- "aws-smithy-http",
- "aws-smithy-json",
- "aws-smithy-observability",
- "aws-smithy-runtime",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "aws-types",
- "bytes",
- "fastrand",
- "http 0.2.12",
- "http 1.4.0",
- "regex-lite",
- "tracing",
-]
-
-[[package]]
-name = "aws-sdk-ssooidc"
-version = "1.102.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b30d254992d56ef19f430396e5765b11e0f5bd21a7a557cb12fca1c8c18b9636"
-dependencies = [
- "arc-swap",
- "aws-credential-types",
- "aws-runtime",
- "aws-smithy-async",
- "aws-smithy-http",
- "aws-smithy-json",
- "aws-smithy-observability",
- "aws-smithy-runtime",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "aws-types",
- "bytes",
- "fastrand",
- "http 0.2.12",
- "http 1.4.0",
- "regex-lite",
- "tracing",
-]
-
-[[package]]
-name = "aws-sdk-sts"
-version = "1.105.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59f4f8065fe615dbed9096458ba98dda6d641553ffd5aedd27e37e65211aca9f"
-dependencies = [
- "aws-credential-types",
- "aws-runtime",
- "aws-smithy-async",
- "aws-smithy-http",
- "aws-smithy-json",
- "aws-smithy-observability",
- "aws-smithy-query",
- "aws-smithy-runtime",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "aws-smithy-xml",
- "aws-types",
- "fastrand",
- "http 0.2.12",
- "http 1.4.0",
- "regex-lite",
- "tracing",
-]
-
 [[package]]
 name = "aws-sigv4"
 version = "1.4.4"
@@ -856,36 +703,6 @@ dependencies = [
  "tracing",
 ]
 
-[[package]]
-name = "aws-smithy-http-client"
-version = "1.1.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a2f165a7feee6f263028b899d0a181987f4fa7179a6411a32a439fba7c5f769"
-dependencies = [
- "aws-smithy-async",
- "aws-smithy-runtime-api",
- "aws-smithy-types",
- "h2 0.3.27",
- "h2 0.4.13",
- "http 0.2.12",
- "http 1.4.0",
- "http-body 0.4.6",
- "hyper 0.14.32",
- "hyper 1.8.1",
- "hyper-rustls 0.24.2",
- "hyper-rustls 0.27.7",
- "hyper-util",
- "pin-project-lite",
- "rustls 0.21.12",
- "rustls 0.23.37",
- "rustls-native-certs",
- "rustls-pki-types",
- "tokio",
- "tokio-rustls 0.26.4",
- "tower",
- "tracing",
-]
-
 [[package]]
 name = "aws-smithy-json"
 version = "0.62.6"
@@ -906,16 +723,6 @@ dependencies = [
  "aws-smithy-runtime-api",
 ]
 
-[[package]]
-name = "aws-smithy-query"
-version = "0.60.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a56d79744fb3edb5d722ef79d86081e121d3b9422cb209eb03aea6aa4f21ebd"
-dependencies = [
- "aws-smithy-types",
- "urlencoding",
-]
-
 [[package]]
 name = "aws-smithy-runtime"
 version = "1.11.3"
@@ -924,7 +731,6 @@ checksum = "b8e6f5caf6fea86f8c2206541ab5857cfcda9013426cdbe8fa0098b9e2d32182"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-http",
- "aws-smithy-http-client",
  "aws-smithy-observability",
  "aws-smithy-runtime-api",
  "aws-smithy-schema",
@@ -991,7 +797,6 @@ dependencies = [
  "base64-simd",
  "bytes",
  "bytes-utils",
- "futures-core",
  "http 0.2.12",
  "http 1.4.0",
  "http-body 0.4.6",
@@ -1004,17 +809,6 @@ dependencies = [
  "ryu",
  "serde",
  "time",
- "tokio",
- "tokio-util",
-]
-
-[[package]]
-name = "aws-smithy-xml"
-version = "0.60.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ce02add1aa3677d022f8adf81dcbe3046a95f17a1b1e8979c145cd21d3d22b3"
-dependencies = [
- "xmlparser",
 ]
 
 [[package]]
@@ -1651,15 +1445,6 @@ dependencies = [
  "error-code",
 ]
 
-[[package]]
-name = "cmake"
-version = "0.1.58"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678"
-dependencies = [
- "cc",
-]
-
 [[package]]
 name = "cmov"
 version = "0.5.3"
@@ -1756,6 +1541,18 @@ version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
+[[package]]
+name = "cost"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "reqwest",
+ "serde",
+ "serde_json 1.0.149",
+ "tempfile",
+ "tokio",
+]
+
 [[package]]
 name = "cpubits"
 version = "0.1.0"
@@ -2155,12 +1952,6 @@ dependencies = [
  "syn",
 ]
 
-[[package]]
-name = "dunce"
-version = "1.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
-
 [[package]]
 name = "dyn-clone"
 version = "1.0.20"
@@ -2347,6 +2138,7 @@ dependencies = [
  "braintrust-llm-router",
  "braintrust-sdk-rust",
  "bytes",
+ "cost",
  "exoharness",
  "futures",
  "lingua",
@@ -2367,6 +2159,7 @@ dependencies = [
  "anyhow",
  "bytes",
  "clap",
+ "cost",
  "executor",
  "exoharness",
  "futures",
@@ -2402,9 +2195,6 @@ dependencies = [
  "aes-gcm 0.11.0-rc.3",
  "anyhow",
  "async-trait",
- "aws-config",
- "aws-credential-types",
- "aws-sdk-bedrockagentcore",
  "base64",
  "bytes",
  "chrono",
@@ -2541,12 +2331,6 @@ dependencies = [
  "percent-encoding",
 ]
 
-[[package]]
-name = "fs_extra"
-version = "1.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
-
 [[package]]
 name = "futures"
 version = "0.3.31"
@@ -2972,30 +2756,6 @@ dependencies = [
  "typenum",
 ]
 
-[[package]]
-name = "hyper"
-version = "0.14.32"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7"
-dependencies = [
- "bytes",
- "futures-channel",
- "futures-core",
- "futures-util",
- "h2 0.3.27",
- "http 0.2.12",
- "http-body 0.4.6",
- "httparse",
- "httpdate",
- "itoa",
- "pin-project-lite",
- "socket2 0.5.10",
- "tokio",
- "tower-service",
- "tracing",
- "want",
-]
-
 [[package]]
 name = "hyper"
 version = "1.8.1"
@@ -3019,21 +2779,6 @@ dependencies = [
  "want",
 ]
 
-[[package]]
-name = "hyper-rustls"
-version = "0.24.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590"
-dependencies = [
- "futures-util",
- "http 0.2.12",
- "hyper 0.14.32",
- "log",
- "rustls 0.21.12",
- "tokio",
- "tokio-rustls 0.24.1",
-]
-
 [[package]]
 name = "hyper-rustls"
 version = "0.27.7"
@@ -3041,13 +2786,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
 dependencies = [
  "http 1.4.0",
- "hyper 1.8.1",
+ "hyper",
  "hyper-util",
- "rustls 0.23.37",
+ "rustls",
  "rustls-native-certs",
  "rustls-pki-types",
  "tokio",
- "tokio-rustls 0.26.4",
+ "tokio-rustls",
  "tower-service",
  "webpki-roots",
 ]
@@ -3064,7 +2809,7 @@ dependencies = [
  "futures-util",
  "http 1.4.0",
  "http-body 1.0.1",
- "hyper 1.8.1",
+ "hyper",
  "ipnet",
  "libc",
  "percent-encoding",
@@ -4112,7 +3857,7 @@ dependencies = [
  "http-body-util",
  "httparse",
  "humantime",
- "hyper 1.8.1",
+ "hyper",
  "itertools 0.14.0",
  "md-5",
  "parking_lot 0.12.5",
@@ -4616,7 +4361,7 @@ dependencies = [
  "quinn-proto",
  "quinn-udp",
  "rustc-hash 2.1.2",
- "rustls 0.23.37",
+ "rustls",
  "socket2 0.6.3",
  "thiserror 2.0.18",
  "tokio",
@@ -4636,7 +4381,7 @@ dependencies = [
  "rand 0.9.4",
  "ring",
  "rustc-hash 2.1.2",
- "rustls 0.23.37",
+ "rustls",
  "rustls-pki-types",
  "slab",
  "thiserror 2.0.18",
@@ -4872,8 +4617,8 @@ dependencies = [
  "http 1.4.0",
  "http-body 1.0.1",
  "http-body-util",
- "hyper 1.8.1",
- "hyper-rustls 0.27.7",
+ "hyper",
+ "hyper-rustls",
  "hyper-util",
  "js-sys",
  "log",
@@ -4881,7 +4626,7 @@ dependencies = [
  "percent-encoding",
  "pin-project-lite",
  "quinn",
- "rustls 0.23.37",
+ "rustls",
  "rustls-native-certs",
  "rustls-pki-types",
  "serde",
@@ -4889,7 +4634,7 @@ dependencies = [
  "serde_urlencoded",
  "sync_wrapper",
  "tokio",
- "tokio-rustls 0.26.4",
+ "tokio-rustls",
  "tokio-util",
  "tower",
  "tower-http",
@@ -4928,7 +4673,7 @@ dependencies = [
  "futures",
  "getrandom 0.2.17",
  "http 1.4.0",
- "hyper 1.8.1",
+ "hyper",
  "parking_lot 0.11.2",
  "reqwest",
  "reqwest-middleware",
@@ -5093,29 +4838,16 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
-[[package]]
-name = "rustls"
-version = "0.21.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e"
-dependencies = [
- "log",
- "ring",
- "rustls-webpki 0.101.7",
- "sct",
-]
-
 [[package]]
 name = "rustls"
 version = "0.23.37"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4"
 dependencies = [
- "aws-lc-rs",
  "once_cell",
  "ring",
  "rustls-pki-types",
- "rustls-webpki 0.103.10",
+ "rustls-webpki",
  "subtle",
  "zeroize",
 ]
@@ -5151,23 +4883,12 @@ dependencies = [
  "zeroize",
 ]
 
-[[package]]
-name = "rustls-webpki"
-version = "0.101.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
-dependencies = [
- "ring",
- "untrusted",
-]
-
 [[package]]
 name = "rustls-webpki"
 version = "0.103.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef"
 dependencies = [
- "aws-lc-rs",
  "ring",
  "rustls-pki-types",
  "untrusted",
@@ -5261,16 +4982,6 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
-[[package]]
-name = "sct"
-version = "0.7.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
-dependencies = [
- "ring",
- "untrusted",
-]
-
 [[package]]
 name = "sec1"
 version = "0.7.3"
@@ -5466,17 +5177,6 @@ dependencies = [
  "unsafe-libyaml",
 ]
 
-[[package]]
-name = "sha1"
-version = "0.10.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
-dependencies = [
- "cfg-if",
- "cpufeatures 0.2.17",
- "digest 0.10.7",
-]
-
 [[package]]
 name = "sha1"
 version = "0.11.0"
@@ -5916,23 +5616,13 @@ dependencies = [
  "syn",
 ]
 
-[[package]]
-name = "tokio-rustls"
-version = "0.24.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
-dependencies = [
- "rustls 0.21.12",
- "tokio",
-]
-
 [[package]]
 name = "tokio-rustls"
 version = "0.26.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61"
 dependencies = [
- "rustls 0.23.37",
+ "rustls",
  "tokio",
 ]
 
@@ -6984,7 +6674,7 @@ dependencies = [
  "futures",
  "http 1.4.0",
  "http-body-util",
- "hyper 1.8.1",
+ "hyper",
  "hyper-util",
  "log",
  "once_cell",
@@ -7101,12 +6791,6 @@ version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
 
-[[package]]
-name = "xmlparser"
-version = "0.13.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4"
-
 [[package]]
 name = "yoke"
 version = "0.7.5"
diff --git a/Cargo.toml b/Cargo.toml
index 65970d0..14a4447 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,7 @@
 [workspace]
 members = [
   "crates/cli",
+  "crates/cost",
   "crates/exoharness",
   "crates/executor",
   "examples/exoclaw/scheduler-runner",
@@ -17,9 +18,6 @@ rust-version = "1.95"
 anyhow = "1.0.100"
 async-trait = "0.1.89"
 actix-web = "4.9.0"
-aws-config = "1.8.17"
-aws-credential-types = "1.2.14"
-aws-sdk-bedrockagentcore = "1.45.0"
 braintrust-sdk-rust = { git = "https://github.com/braintrustdata/braintrust-sdk-rust", rev = "55dcefd989b6038edb5ef8dfc438384340d5f4ae" }
 bytes = "1.10.1"
 chrono = { version = "0.4.42", features = ["serde"] }
diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml
index bb93753..639715f 100644
--- a/crates/cli/Cargo.toml
+++ b/crates/cli/Cargo.toml
@@ -5,10 +5,6 @@ edition.workspace = true
 license.workspace = true
 rust-version.workspace = true
 
-[features]
-default = []
-aws-agentcore = ["executor/aws-agentcore"]
-
 [[bin]]
 name = "exo"
 path = "src/main.rs"
@@ -16,6 +12,7 @@ path = "src/main.rs"
 [dependencies]
 anyhow.workspace = true
 clap.workspace = true
+cost = { path = "../cost" }
 executor = { path = "../executor" }
 lingua.workspace = true
 rustyline = "15.0.0"
diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs
index 1dd8424..51a6bef 100644
--- a/crates/cli/src/main.rs
+++ b/crates/cli/src/main.rs
@@ -32,9 +32,9 @@ use executor::{
     LocalSandboxExoHarness, PutSecretRequest, RlmHarness, SANDBOX_MAIN_MOUNT_DIR,
     SandboxBackendChoice, SandboxProvider, SandboxProviderConfig, SandboxScope, Secret,
     SecretBackendChoice, ToolRequest, ToolRuntime, TypeScriptHarness, TypeScriptHarnessConfig,
-    Uuid7, VercelBackendSpec, default_aws_agentcore_image, default_daytona_image,
-    default_docker_image, default_vercel_image, effective_sandbox_scope, load_agent_config,
-    send_conversation_wakeup, serve_exoharness_http_listener_with_options,
+    Uuid7, VercelBackendSpec, default_daytona_image, default_docker_image, default_vercel_image,
+    effective_sandbox_scope, load_agent_config, send_conversation_wakeup,
+    serve_exoharness_http_listener_with_options,
 };
 use lingua::Message;
 use lingua::universal::{AssistantContent, AssistantContentPart, ToolContentPart, UserContent};
@@ -88,6 +88,12 @@ struct Cli {
         value_parser = parse_env_var_name
     )]
     bearer_env: Option<String>,
+    /// Path to a LiteLLM price JSON for cost tracking (overrides fetch/cache).
+    #[arg(long, global = true, env = "EXO_LITELLM_PRICES_PATH")]
+    pricing_path: Option<PathBuf>,
+    /// URL to fetch the LiteLLM price JSON from (overrides the default source).
+    #[arg(long, global = true, env = "EXO_LITELLM_PRICES_URL")]
+    pricing_url: Option<String>,
     #[command(subcommand)]
     command: Commands,
 }
@@ -206,8 +212,6 @@ enum SecretBackendArg {
 enum SandboxProviderArg {
     Daytona,
     Vercel,
-    #[value(name = "aws-agentcore")]
-    AwsAgentCore,
     #[value(name = "apple-container")]
     AppleContainer,
     Docker,
@@ -220,7 +224,6 @@ impl From<SandboxProviderArg> for SandboxProvider {
         match value {
             SandboxProviderArg::Daytona => Self::Daytona,
             SandboxProviderArg::Vercel => Self::Vercel,
-            SandboxProviderArg::AwsAgentCore => Self::AwsAgentCore,
             SandboxProviderArg::AppleContainer => Self::AppleContainer,
             SandboxProviderArg::Docker => Self::Docker,
             SandboxProviderArg::LocalProcess => Self::LocalProcess,
@@ -251,22 +254,9 @@ fn default_sandbox_backends() -> Vec<SandboxBackendChoice> {
         SandboxBackendChoice::LocalProcess,
         SandboxBackendChoice::Daytona(DaytonaBackendSpec::with_conventional_secrets()),
         SandboxBackendChoice::Vercel(VercelBackendSpec::with_conventional_secrets()),
-        SandboxBackendChoice::AwsAgentCore,
     ]
 }
 
-fn agentcore_region_from_arn(runtime_arn: &str) -> Option<String> {
-    let mut parts = runtime_arn.split(':');
-    let arn = parts.next()?;
-    let _partition = parts.next()?;
-    let service = parts.next()?;
-    let region = parts.next()?;
-    if arn == "arn" && service == "bedrock-agentcore" && !region.is_empty() {
-        return Some(region.to_string());
-    }
-    None
-}
-
 #[cfg(target_os = "macos")]
 fn default_secret_backend() -> SecretBackendArg {
     SecretBackendArg::AppleKeychain
@@ -579,35 +569,28 @@ enum ProviderCommands {
     /// List configured sandbox provider bindings.
     List,
     /// Configure a sandbox provider (writes a Binding::Sandbox).
-    Configure(Box<ProviderConfigureArgs>),
-}
-
-#[derive(Debug, Args)]
-struct ProviderConfigureArgs {
-    #[arg(long, value_enum)]
-    provider: SandboxProviderArg,
-    /// Binding name (default: the provider name).
-    #[arg(long)]
-    name: Option<String>,
-    /// Secret (by name) holding the provider's API key/token. Required for Daytona and Vercel.
-    #[arg(long)]
-    secret: Option<String>,
-    /// Region/target. Daytona: us | eu | experimental.
-    #[arg(long)]
-    region: Option<String>,
-    #[arg(long)]
-    organization_id: Option<String>,
-    #[arg(long)]
-    project_id: Option<String>,
-    #[arg(long)]
-    api_url: Option<String>,
-    #[arg(long = "runtime-arn")]
-    runtime_arn: Option<String>,
-    #[arg(long)]
-    qualifier: Option<String>,
-    /// Default base image for sandboxes that don't request one.
-    #[arg(long)]
-    default_image: Option<String>,
+    Configure {
+        #[arg(long, value_enum)]
+        provider: SandboxProviderArg,
+        /// Binding name (default: the provider name).
+        #[arg(long)]
+        name: Option<String>,
+        /// Secret (by name) holding the provider's API key/token. Required for remote providers.
+        #[arg(long)]
+        secret: Option<String>,
+        /// Region/target. Daytona: us | eu | experimental.
+        #[arg(long)]
+        region: Option<String>,
+        #[arg(long)]
+        organization_id: Option<String>,
+        #[arg(long)]
+        project_id: Option<String>,
+        #[arg(long)]
+        api_url: Option<String>,
+        /// Default base image for sandboxes that don't request one.
+        #[arg(long)]
+        default_image: Option<String>,
+    },
 }
 
 #[derive(Debug, Clone, Default, Args)]
@@ -748,6 +731,7 @@ async fn main() -> Result<()> {
         &cli.command,
     )
     .await?;
+    let pricing = Arc::new(cost::load(cli.pricing_path.clone(), cli.pricing_url.clone()).await);
     let harness = instantiate_harness(
         &cli.root,
         &exo_config,
@@ -755,6 +739,7 @@ async fn main() -> Result<()> {
         harness_kind,
         runtime_config.clone(),
         env_vars.clone(),
+        pricing,
     )
     .await?;
 
@@ -1746,21 +1731,17 @@ async fn main() -> Result<()> {
                     }
                 }
             }
-            ProviderCommands::Configure(args) => {
-                let ProviderConfigureArgs {
-                    provider,
-                    name,
-                    secret,
-                    region,
-                    organization_id,
-                    project_id,
-                    api_url,
-                    runtime_arn,
-                    qualifier,
-                    default_image,
-                } = *args;
-                let binding_name =
-                    name.unwrap_or_else(|| SandboxProvider::from(provider).as_str().to_string());
+            ProviderCommands::Configure {
+                provider,
+                name,
+                secret,
+                region,
+                organization_id,
+                project_id,
+                api_url,
+                default_image,
+            } => {
+                let binding_name = name.unwrap_or_else(|| format!("{provider:?}").to_lowercase());
                 let config = match provider {
                     SandboxProviderArg::Daytona => {
                         let secret =
@@ -1796,27 +1777,6 @@ async fn main() -> Result<()> {
                             default_image: default_image.unwrap_or_else(default_vercel_image),
                         }
                     }
-                    SandboxProviderArg::AwsAgentCore => {
-                        let runtime_arn = runtime_arn.ok_or_else(|| {
-                            anyhow!("--runtime-arn is required for aws-agentcore")
-                        })?;
-                        let region = match region {
-                            Some(region) => region,
-                            None => agentcore_region_from_arn(&runtime_arn).ok_or_else(|| {
-                                anyhow!(
-                                    "--region is required when the AgentCore runtime ARN does not include a region"
-                                )
-                            })?,
-                        };
-                        SandboxProviderConfig::AwsAgentCore {
-                            runtime_arn,
-                            region,
-                            qualifier,
-                            endpoint_url: api_url,
-                            default_image: default_image
-                                .unwrap_or_else(default_aws_agentcore_image),
-                        }
-                    }
                     SandboxProviderArg::Docker => SandboxProviderConfig::Docker {
                         default_image: default_image.unwrap_or_else(default_docker_image),
                     },
@@ -2010,12 +1970,14 @@ async fn instantiate_harness(
     kind: HarnessKind,
     runtime_config: Option<BraintrustRuntimeConfig>,
     env_vars: HashMap<String, String>,
+    pricing: Arc<cost::PricingTable>,
 ) -> Result<Arc<dyn Harness>> {
     let harness: Arc<dyn Harness> = match kind {
         HarnessKind::Basic => Arc::new(BasicHarness::from_exoharness(
             exoharness,
             runtime_config,
             env_vars,
+            pricing,
         )),
         HarnessKind::Rlm => Arc::new(RlmHarness::from_exoharness(
             exoharness,
diff --git a/crates/cli/src/tui.rs b/crates/cli/src/tui.rs
index b8eef1e..78bcb18 100644
--- a/crates/cli/src/tui.rs
+++ b/crates/cli/src/tui.rs
@@ -928,6 +928,7 @@ mod tests {
                 ),
             }],
             response_id: None,
+            usage: None,
         });
 
         assert_eq!(rendered.len(), 1);
diff --git a/crates/cost/Cargo.toml b/crates/cost/Cargo.toml
new file mode 100644
index 0000000..7d978a2
--- /dev/null
+++ b/crates/cost/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "cost"
+edition.workspace = true
+version.workspace = true
+license.workspace = true
+rust-version.workspace = true
+
+[dependencies]
+anyhow.workspace = true
+reqwest.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+
+[dev-dependencies]
+tempfile = "3.23.0"
+tokio = { workspace = true }
diff --git a/crates/cost/src/lib.rs b/crates/cost/src/lib.rs
new file mode 100644
index 0000000..cfb9f71
--- /dev/null
+++ b/crates/cost/src/lib.rs
@@ -0,0 +1,5 @@
+mod loader;
+mod table;
+
+pub use loader::load;
+pub use table::{ModelEntry, PricingTable, TokenCounts};
diff --git a/crates/cost/src/loader.rs b/crates/cost/src/loader.rs
new file mode 100644
index 0000000..8dcf0fa
--- /dev/null
+++ b/crates/cost/src/loader.rs
@@ -0,0 +1,125 @@
+//! Loads the LiteLLM price database. Resolution: explicit path -> fresh cache
+//! -> fetch (cached on success) -> stale cache -> empty. Never fails: any error
+//! degrades to an empty table (cost stays unset, tokens still persist).
+
+use std::path::PathBuf;
+use std::time::Duration;
+
+use crate::PricingTable;
+
+const DEFAULT_URL: &str =
+    "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
+const CACHE_TTL: Duration = Duration::from_secs(24 * 60 * 60);
+const FETCH_TIMEOUT: Duration = Duration::from_secs(5);
+
+/// Load the table once at startup. `path`/`url` are caller-supplied (CLI flags or
+/// env), so this stays free of global config reads.
+pub async fn load(path: Option<PathBuf>, url: Option<String>) -> PricingTable {
+    if let Some(path) = path {
+        return match std::fs::read_to_string(&path) {
+            Ok(body) => parse_or_empty(&body),
+            Err(err) => {
+                eprintln!(
+                    "[exo] reading pricing path {}: {err}; cost unavailable",
+                    path.display()
+                );
+                PricingTable::empty()
+            }
+        };
+    }
+
+    let cache = cache_path();
+    let cached = cache.as_ref().and_then(read_cache);
+    if let Some((body, true)) = &cached {
+        return parse_or_empty(body);
+    }
+
+    let url = url.unwrap_or_else(|| DEFAULT_URL.to_string());
+    match fetch(&url).await {
+        Ok(body) if PricingTable::from_json_str(&body).is_ok() => {
+            if let Some(path) = &cache {
+                write_cache(path, &body);
+            }
+            parse_or_empty(&body)
+        }
+        Ok(_) => {
+            eprintln!("[exo] fetched pricing was unparseable; cost unavailable");
+            PricingTable::empty()
+        }
+        Err(err) => match cached {
+            Some((body, _)) => {
+                eprintln!("[exo] pricing fetch failed ({err}); using stale cache");
+                parse_or_empty(&body)
+            }
+            None => {
+                eprintln!("[exo] pricing fetch failed ({err}); cost unavailable");
+                PricingTable::empty()
+            }
+        },
+    }
+}
+
+/// A corrupt or truncated document degrades to an empty table rather than erroring.
+fn parse_or_empty(body: &str) -> PricingTable {
+    PricingTable::from_json_str(body).unwrap_or_else(|_| {
+        eprintln!("[exo] cached pricing is unparseable; cost unavailable until it refreshes");
+        PricingTable::empty()
+    })
+}
+
+async fn fetch(url: &str) -> anyhow::Result<String> {
+    let client = reqwest::Client::builder().timeout(FETCH_TIMEOUT).build()?;
+    Ok(client
+        .get(url)
+        .send()
+        .await?
+        .error_for_status()?
+        .text()
+        .await?)
+}
+
+/// Returns `(body, is_fresh)`; `is_fresh` is true only within `CACHE_TTL`.
+fn read_cache(path: &PathBuf) -> Option<(String, bool)> {
+    let fresh = std::fs::metadata(path)
+        .and_then(|m| m.modified())
+        .ok()
+        .and_then(|t| t.elapsed().ok())
+        .is_some_and(|age| age < CACHE_TTL);
+    Some((std::fs::read_to_string(path).ok()?, fresh))
+}
+
+fn write_cache(path: &PathBuf, body: &str) {
+    if let Some(parent) = path.parent() {
+        let _ = std::fs::create_dir_all(parent);
+    }
+    let _ = std::fs::write(path, body);
+}
+
+fn cache_path() -> Option<PathBuf> {
+    let base = std::env::var_os("XDG_CACHE_HOME")
+        .map(PathBuf::from)
+        .or_else(|| std::env::var_os("HOME").map(|h| PathBuf::from(h).join(".cache")))?;
+    Some(base.join("exo").join("litellm_prices.json"))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::TempDir;
+
+    const FIXTURE: &str = r#"{ "claude-sonnet-4-6": { "litellm_provider": "anthropic", "input_cost_per_token": 3e-06 } }"#;
+
+    #[tokio::test]
+    async fn explicit_path_is_loaded() {
+        let dir = TempDir::new().unwrap();
+        let path = dir.path().join("prices.json");
+        std::fs::write(&path, FIXTURE).unwrap();
+        let table = load(Some(path), None).await;
+        assert!(table.lookup("claude-sonnet-4-6").is_some());
+    }
+
+    #[test]
+    fn corrupt_body_degrades_to_empty() {
+        assert!(parse_or_empty("{ not json").is_empty());
+    }
+}
diff --git a/crates/cost/src/table.rs b/crates/cost/src/table.rs
new file mode 100644
index 0000000..6bda82c
--- /dev/null
+++ b/crates/cost/src/table.rs
@@ -0,0 +1,243 @@
+//! Per-model price data and cost math, parsed from LiteLLM's pricing database.
+//! Pure: no network, no globals.
+
+use std::collections::HashMap;
+
+use serde::Deserialize;
+
+#[derive(Debug, Clone, Deserialize, Default)]
+pub struct ModelEntry {
+    #[serde(default)]
+    pub litellm_provider: Option<String>,
+    #[serde(default)]
+    pub input_cost_per_token: Option<f64>,
+    #[serde(default)]
+    pub output_cost_per_token: Option<f64>,
+    #[serde(default)]
+    pub cache_read_input_token_cost: Option<f64>,
+    #[serde(default)]
+    pub cache_creation_input_token_cost: Option<f64>,
+}
+
+#[derive(Debug, Clone, Copy, Default)]
+pub struct TokenCounts {
+    pub prompt: Option<i64>,
+    pub completion: Option<i64>,
+    pub prompt_cached: Option<i64>,
+    pub prompt_cache_creation: Option<i64>,
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct PricingTable {
+    entries: HashMap<String, ModelEntry>,
+}
+
+impl PricingTable {
+    pub fn empty() -> Self {
+        Self::default()
+    }
+
+    /// Parse a LiteLLM `model_prices_and_context_window.json` document. The
+    /// `sample_spec` doc entry and any entry without per-token rates are skipped.
+    pub fn from_json_str(s: &str) -> anyhow::Result<Self> {
+        let raw: HashMap<String, serde_json::Value> = serde_json::from_str(s)?;
+        let entries = raw
+            .into_iter()
+            .filter(|(key, _)| key != "sample_spec")
+            .filter_map(|(key, value)| {
+                Some((key, serde_json::from_value::<ModelEntry>(value).ok()?))
+            })
+            .collect();
+        Ok(Self { entries })
+    }
+
+    pub fn len(&self) -> usize {
+        self.entries.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.entries.is_empty()
+    }
+
+    /// Exact match, else the longest entry key that is a prefix of `model` at a
+    /// token boundary (next char absent or `-`/`:`), so dated revisions resolve
+    /// (`claude-sonnet-4-6-20251022` -> `claude-sonnet-4-6`) without sliding
+    /// `gpt-4o-mini` onto a `gpt-4` entry when `gpt-4o` is missing.
+    pub fn lookup(&self, model: &str) -> Option<&ModelEntry> {
+        if let Some(entry) = self.entries.get(model) {
+            return Some(entry);
+        }
+        self.entries
+            .iter()
+            .filter(|(key, _)| {
+                model.starts_with(key.as_str())
+                    && matches!(
+                        model.as_bytes().get(key.len()),
+                        None | Some(b'-') | Some(b':')
+                    )
+            })
+            .max_by_key(|(key, _)| key.len())
+            .map(|(_, entry)| entry)
+    }
+
+    /// USD cost for one call, or `None` if the model is unknown or unpriced.
+    pub fn compute_cost_usd(&self, model: &str, tokens: TokenCounts) -> Option<f64> {
+        let entry = self.lookup(model)?;
+        let input = entry.input_cost_per_token?;
+        let output = entry.output_cost_per_token.unwrap_or(0.0);
+        let cache_read = entry.cache_read_input_token_cost.unwrap_or(input);
+        let cache_write = entry.cache_creation_input_token_cost.unwrap_or(input);
+
+        let prompt = tokens.prompt.unwrap_or(0).max(0) as f64;
+        let completion = tokens.completion.unwrap_or(0).max(0) as f64;
+        let cached = tokens.prompt_cached.unwrap_or(0).max(0) as f64;
+        let created = tokens.prompt_cache_creation.unwrap_or(0).max(0) as f64;
+
+        // Anthropic-family `prompt_tokens` excludes cached (bill additively);
+        // everyone else includes it (subtract before billing fresh input).
+        let fresh = if is_additive(entry.litellm_provider.as_deref()) {
+            prompt
+        } else {
+            (prompt - cached).max(0.0)
+        };
+        Some(fresh * input + cached * cache_read + created * cache_write + completion * output)
+    }
+}
+
+fn is_additive(provider: Option<&str>) -> bool {
+    match provider {
+        Some(p) => {
+            p.starts_with("anthropic") || p.starts_with("vertex_ai-anthropic") || p == "azure_ai"
+        }
+        None => false,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    const FIXTURE: &str = r#"{
+        "sample_spec": { "comment": "ignored" },
+        "claude-sonnet-4-6": {
+            "litellm_provider": "anthropic", "input_cost_per_token": 3e-06,
+            "output_cost_per_token": 1.5e-05, "cache_read_input_token_cost": 3e-07,
+            "cache_creation_input_token_cost": 3.75e-06
+        },
+        "gpt-4o-mini": {
+            "litellm_provider": "openai", "input_cost_per_token": 1.5e-07,
+            "output_cost_per_token": 6e-07, "cache_read_input_token_cost": 7.5e-08
+        },
+        "gpt-4": { "litellm_provider": "openai", "input_cost_per_token": 3e-05, "output_cost_per_token": 6e-05 },
+        "us.anthropic.claude-sonnet-4-6": {
+            "litellm_provider": "bedrock_converse", "input_cost_per_token": 3.3e-06,
+            "output_cost_per_token": 1.65e-05, "cache_read_input_token_cost": 3.3e-07,
+            "cache_creation_input_token_cost": 4.125e-06
+        }
+    }"#;
+
+    fn table() -> PricingTable {
+        PricingTable::from_json_str(FIXTURE).unwrap()
+    }
+
+    fn approx(a: f64, b: f64) {
+        assert!(
+            (a - b).abs() / b.abs().max(1e-12) < 1e-9,
+            "expected {b}, got {a}"
+        );
+    }
+
+    fn counts(prompt: i64, completion: i64, cached: i64, created: i64) -> TokenCounts {
+        TokenCounts {
+            prompt: Some(prompt),
+            completion: Some(completion),
+            prompt_cached: Some(cached),
+            prompt_cache_creation: Some(created),
+        }
+    }
+
+    #[test]
+    fn empty_table_is_none() {
+        assert!(
+            PricingTable::empty()
+                .compute_cost_usd("claude-sonnet-4-6", counts(100, 50, 0, 0))
+                .is_none()
+        );
+    }
+
+    #[test]
+    fn skips_sample_spec() {
+        assert!(table().lookup("sample_spec").is_none());
+        assert_eq!(table().len(), 4);
+    }
+
+    #[test]
+    fn dated_revision_resolves_to_base() {
+        let table = table();
+        let entry = table.lookup("claude-sonnet-4-6-20251022").unwrap();
+        assert_eq!(entry.litellm_provider.as_deref(), Some("anthropic"));
+    }
+
+    #[test]
+    fn prefix_match_respects_token_boundary() {
+        // `gpt-4o` is absent; lookup must NOT fall back to `gpt-4`.
+        assert!(table().lookup("gpt-4o").is_none());
+        // `gpt-4-0613` is a boundary extension of `gpt-4` and should match it.
+        assert!(table().lookup("gpt-4-0613").is_some());
+    }
+
+    #[test]
+    fn anthropic_additive() {
+        // 500 fresh + 10k cache-read + 200 completion (prompt excludes cached).
+        approx(
+            table()
+                .compute_cost_usd("claude-sonnet-4-6", counts(500, 200, 10_000, 0))
+                .unwrap(),
+            0.0075,
+        );
+    }
+
+    #[test]
+    fn anthropic_cache_creation() {
+        approx(
+            table()
+                .compute_cost_usd("claude-sonnet-4-6", counts(0, 100, 0, 5_000))
+                .unwrap(),
+            0.02025,
+        );
+    }
+
+    #[test]
+    fn openai_inclusive_subtracts_cached() {
+        // prompt=2000 includes 500 cached -> 1500 fresh.
+        approx(
+            table()
+                .compute_cost_usd("gpt-4o-mini", counts(2_000, 1_000, 500, 0))
+                .unwrap(),
+            0.0008625,
+        );
+    }
+
+    #[test]
+    fn bedrock_is_inclusive_not_additive() {
+        // prompt=2000 includes 500 cached -> fresh 1500 @ 3.3e-6, cached 500 @ 3.3e-7, 1000 out @ 1.65e-5.
+        approx(
+            table()
+                .compute_cost_usd(
+                    "us.anthropic.claude-sonnet-4-6",
+                    counts(2_000, 1_000, 500, 0),
+                )
+                .unwrap(),
+            1_500.0 * 3.3e-6 + 500.0 * 3.3e-7 + 1_000.0 * 1.65e-5,
+        );
+    }
+
+    #[test]
+    fn unknown_model_is_none() {
+        assert!(
+            table()
+                .compute_cost_usd("acme-llm-9000", counts(100, 50, 0, 0))
+                .is_none()
+        );
+    }
+}
diff --git a/crates/executor/Cargo.toml b/crates/executor/Cargo.toml
index 08fc948..0d8dfb6 100644
--- a/crates/executor/Cargo.toml
+++ b/crates/executor/Cargo.toml
@@ -5,10 +5,6 @@ version.workspace = true
 license.workspace = true
 rust-version.workspace = true
 
-[features]
-default = []
-aws-agentcore = ["exoharness/aws-agentcore"]
-
 [dependencies]
 anyhow.workspace = true
 async-trait.workspace = true
@@ -17,6 +13,7 @@ boa_engine = "0.20.0"
 braintrust-llm-router.workspace = true
 braintrust-sdk-rust.workspace = true
 bytes.workspace = true
+cost = { path = "../cost" }
 exoharness = { path = "../exoharness", features = [
   "apple-keychain",
   "basic-backend",
diff --git a/crates/executor/src/basic.rs b/crates/executor/src/basic.rs
index 29993a9..2412d2c 100644
--- a/crates/executor/src/basic.rs
+++ b/crates/executor/src/basic.rs
@@ -3,9 +3,10 @@ use std::sync::{Arc, RwLock};
 use std::time::Instant;
 
 use async_trait::async_trait;
+use cost::{PricingTable, TokenCounts};
 use exoharness::{
     AgentHandle, ConversationHandle, ConversationId, EventData, EventId, EventKind, EventQuery,
-    EventQueryDirection, Result, ToolCallId, ToolRequest, TurnHandle,
+    EventQueryDirection, Result, ToolCallId, ToolRequest, TurnHandle, UsageRecord,
 };
 use lingua::Message;
 use lingua::universal::{ToolContentPart, ToolResultContentPart};
@@ -24,14 +25,21 @@ pub struct BasicExecutor<M, T> {
     model: Arc<M>,
     tools: Arc<T>,
     history_cache: Arc<RwLock<HashMap<ConversationId, HistoryCacheEntry>>>,
+    pricing: Arc<PricingTable>,
 }
 
 impl<M, T> BasicExecutor<M, T> {
     pub fn new(model: Arc<M>, tools: Arc<T>) -> Self {
+        Self::with_pricing(model, tools, Arc::new(PricingTable::empty()))
+    }
+
+    /// Cost is filled from `pricing`; an empty table leaves `cost_usd` unset.
+    pub fn with_pricing(model: Arc<M>, tools: Arc<T>, pricing: Arc<PricingTable>) -> Self {
         Self {
             model,
             tools,
             history_cache: Arc::new(RwLock::new(HashMap::new())),
+            pricing,
         }
     }
 }
@@ -42,6 +50,7 @@ impl<M, T> Clone for BasicExecutor<M, T> {
             model: Arc::clone(&self.model),
             tools: Arc::clone(&self.tools),
             history_cache: Arc::clone(&self.history_cache),
+            pricing: Arc::clone(&self.pricing),
         }
     }
 }
@@ -133,7 +142,7 @@ where
                 .complete_model_round(request, round as usize, stream_mode, turn_trace)
                 .await?;
 
-            let events = interpret_model_response(response);
+            let events = interpret_model_response(response, &self.pricing);
             turn.add_events(events.clone()).await?;
 
             let tool_requests = collect_tool_requests(&events);
@@ -170,9 +179,11 @@ where
             Some(turn_trace) => turn_trace.start_llm_round(&request, round).await,
             None => None,
         };
+        let requested_model = request.model.clone();
 
         match stream_mode {
             ExecutorStreamMode::Disabled => {
+                let started_at = Instant::now();
                 let response = match self.model.complete(request).await {
                     Ok(response) => response,
                     Err(error) => {
@@ -182,6 +193,14 @@ where
                         return Err(error);
                     }
                 };
+                let duration = started_at.elapsed();
+                let mut response = response;
+                if response.model.is_none() {
+                    response.model = Some(requested_model);
+                }
+                if response.duration.is_none() {
+                    response.duration = Some(duration);
+                }
                 if let Some(llm_trace) = llm_trace {
                     llm_trace.finish_success(&response, None).await;
                 }
@@ -236,6 +255,17 @@ where
                         return Err(error);
                     }
                 };
+                let duration = started_at.elapsed();
+                let mut response = response;
+                if response.model.is_none() {
+                    response.model = Some(requested_model);
+                }
+                if response.ttft.is_none() {
+                    response.ttft = ttft;
+                }
+                if response.duration.is_none() {
+                    response.duration = Some(duration);
+                }
                 if let Some(llm_trace) = llm_trace {
                     llm_trace.finish_success(&response, ttft).await;
                 }
@@ -447,13 +477,15 @@ fn remove_pending_tool_call(pending_tool_call_ids: &mut Vec<ToolCallId>, tool_ca
     }
 }
 
-fn interpret_model_response(response: ModelResponse) -> Vec<EventData> {
+fn interpret_model_response(response: ModelResponse, pricing: &PricingTable) -> Vec<EventData> {
     let mut events = Vec::new();
 
     if !response.messages.is_empty() {
+        let usage = build_usage_record(&response, pricing);
         events.push(EventData::Messages {
             messages: response.messages,
             response_id: response.response_id,
+            usage,
         });
     }
 
@@ -468,6 +500,64 @@ fn interpret_model_response(response: ModelResponse) -> Vec<EventData> {
     events
 }
 
+fn build_usage_record(
+    response: &ModelResponse,
+    pricing: &PricingTable,
+) -> Option<Box<UsageRecord>> {
+    // Only emit a record when we have *something* worth recording — token usage
+    // or timing. Skipping when both are absent keeps event JSON clean for
+    // tests/fakes that don't populate metadata.
+    let has_usage = response.usage.is_some();
+    let has_timing = response.ttft.is_some() || response.duration.is_some();
+    if !has_usage && !has_timing {
+        return None;
+    }
+
+    let model = response.model.clone().unwrap_or_default();
+    let (
+        prompt_tokens,
+        completion_tokens,
+        prompt_cached_tokens,
+        prompt_cache_creation_tokens,
+        completion_reasoning_tokens,
+    ) = match &response.usage {
+        Some(u) => (
+            u.prompt_tokens,
+            u.completion_tokens,
+            u.prompt_cached_tokens,
+            u.prompt_cache_creation_tokens,
+            u.completion_reasoning_tokens,
+        ),
+        None => (None, None, None, None, None),
+    };
+
+    let cost_usd = if has_usage && !model.is_empty() {
+        pricing.compute_cost_usd(
+            &model,
+            TokenCounts {
+                prompt: prompt_tokens,
+                completion: completion_tokens,
+                prompt_cached: prompt_cached_tokens,
+                prompt_cache_creation: prompt_cache_creation_tokens,
+            },
+        )
+    } else {
+        None
+    };
+
+    Some(Box::new(UsageRecord {
+        model,
+        prompt_tokens,
+        completion_tokens,
+        prompt_cached_tokens,
+        prompt_cache_creation_tokens,
+        completion_reasoning_tokens,
+        cost_usd,
+        ttft_ms: response.ttft.map(|d| d.as_millis() as u64),
+        duration_ms: response.duration.map(|d| d.as_millis() as u64),
+    }))
+}
+
 #[derive(Debug, Clone)]
 struct ExecutableToolRequest {
     tool_call_id: String,
diff --git a/crates/executor/src/basic_tests.rs b/crates/executor/src/basic_tests.rs
index a7fd519..e88b219 100644
--- a/crates/executor/src/basic_tests.rs
+++ b/crates/executor/src/basic_tests.rs
@@ -48,6 +48,9 @@ async fn send_appends_user_and_assistant_messages() {
             messages: vec![assistant_message("pong")],
             tool_calls: vec![],
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         }])),
         Arc::new(FakeToolRuntime::default()),
     );
@@ -136,12 +139,18 @@ async fn send_executes_tool_round_trip() {
                 },
             }],
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
         ModelResponse {
             response_id: Some(Uuid7::now()),
             messages: vec![assistant_message("done")],
             tool_calls: vec![],
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
     ]));
     let executor = BasicExecutor::new(
@@ -249,12 +258,18 @@ async fn send_records_tool_result_when_tool_execution_fails() {
                 },
             }],
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
         ModelResponse {
             response_id: Some(Uuid7::now()),
             messages: vec![assistant_message("recovered")],
             tool_calls: vec![],
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
     ]));
     let executor = BasicExecutor::new(
@@ -357,6 +372,9 @@ async fn send_stream_emits_chunks_and_persists_final_response() {
                 messages: vec![assistant_message("hello")],
                 tool_calls: vec![],
                 usage: None,
+                model: None,
+                ttft: None,
+                duration: None,
             },
         }])),
         Arc::new(FakeToolRuntime::default()),
@@ -686,18 +704,12 @@ impl AgentHandle for FakeAgentHandle {
         &self.record
     }
 
-    async fn list_conversations(
-        &self,
-        _request: exoharness::ListConversationsRequest,
-    ) -> Result<exoharness::ListConversationsResult<Arc<dyn ConversationHandle>>> {
+    async fn list_conversations(&self) -> Result<Vec<Arc<dyn ConversationHandle>>> {
         let state = self.state.lock().expect("state poisoned");
-        Ok(exoharness::ListConversationsResult {
-            conversations: vec![Arc::new(FakeConversationHandle {
-                state: Arc::clone(&self.state),
-                record: state.conversation.record.clone(),
-            })],
-            next_cursor: None,
-        })
+        Ok(vec![Arc::new(FakeConversationHandle {
+            state: Arc::clone(&self.state),
+            record: state.conversation.record.clone(),
+        })])
     }
 
     async fn get_conversation(
@@ -806,6 +818,7 @@ impl ConversationHandle for FakeConversationHandle {
                 EventData::Messages {
                     messages: request.input,
                     response_id: None,
+                    usage: None,
                 },
             ));
         }
diff --git a/crates/executor/src/executor_types.rs b/crates/executor/src/executor_types.rs
index 68292fc..1592a75 100644
--- a/crates/executor/src/executor_types.rs
+++ b/crates/executor/src/executor_types.rs
@@ -184,6 +184,15 @@ pub struct ModelResponse {
     pub messages: Vec<Message>,
     pub tool_calls: Vec<PendingToolCall>,
     pub usage: Option<UniversalUsage>,
+    /// Model identifier echoed back by the provider, if available.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+    /// Time to first token (streaming path only).
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub ttft: Option<Duration>,
+    /// Wall-clock duration from request start to end of response.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub duration: Option<Duration>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
diff --git a/crates/executor/src/harness_basic.rs b/crates/executor/src/harness_basic.rs
index 51bccfb..e1f339b 100644
--- a/crates/executor/src/harness_basic.rs
+++ b/crates/executor/src/harness_basic.rs
@@ -2,6 +2,7 @@ use std::collections::HashMap;
 use std::sync::Arc;
 
 use crate::{BasicExecutor, BraintrustRuntimeConfig, ModelClient, ToolRuntime};
+use cost::PricingTable;
 use exoharness::{BasicExoHarness, BasicExoHarnessConfig, ExoHarness, Result};
 
 use crate::harness_executor::ExecutorHarnessRuntime;
@@ -24,6 +25,24 @@ impl<M, T> BasicHarness<M, T> {
             inner: SharedHarness::new(exoharness, runtime),
         }
     }
+
+    /// Construct a harness whose executor fills cost from an explicit table.
+    pub fn with_pricing_table(
+        exoharness: Arc<dyn ExoHarness>,
+        model: Arc<M>,
+        tools: Arc<T>,
+        pricing: Arc<PricingTable>,
+    ) -> Self
+    where
+        M: ModelClient + 'static,
+        T: ToolRuntime + 'static,
+    {
+        let runtime =
+            ExecutorHarnessRuntime::new(BasicExecutor::with_pricing(model, tools, pricing), None);
+        Self {
+            inner: SharedHarness::new(exoharness, runtime),
+        }
+    }
 }
 
 impl BasicHarness<RouterModelClient, BasicToolRuntime> {
@@ -31,10 +50,14 @@ impl BasicHarness<RouterModelClient, BasicToolRuntime> {
         exoharness: Arc<dyn ExoHarness>,
         runtime_config: Option<BraintrustRuntimeConfig>,
         env: HashMap<String, String>,
+        pricing: Arc<PricingTable>,
     ) -> Self {
         let model = Arc::new(RouterModelClient::new(env));
         let tools = Arc::new(BasicToolRuntime);
-        let runtime = ExecutorHarnessRuntime::new(BasicExecutor::new(model, tools), runtime_config);
+        let runtime = ExecutorHarnessRuntime::new(
+            BasicExecutor::with_pricing(model, tools, pricing),
+            runtime_config,
+        );
         Self {
             inner: SharedHarness::new(exoharness, runtime),
         }
@@ -49,6 +72,7 @@ impl BasicHarness<RouterModelClient, BasicToolRuntime> {
             Arc::new(BasicExoHarness::new(exo_config).await?),
             runtime_config,
             env,
+            Arc::new(PricingTable::empty()),
         ))
     }
 }
diff --git a/crates/executor/src/harness_basic_tests.rs b/crates/executor/src/harness_basic_tests.rs
index f1c3de1..9c732c4 100644
--- a/crates/executor/src/harness_basic_tests.rs
+++ b/crates/executor/src/harness_basic_tests.rs
@@ -12,7 +12,7 @@ use exoharness::{
     ToolRequest, Uuid7,
 };
 use lingua::universal::{AssistantContent, UserContent};
-use lingua::{Message, UniversalStreamChunk};
+use lingua::{Message, UniversalStreamChunk, UniversalUsage};
 use serde_json::{Map, Value};
 use tempfile::TempDir;
 
@@ -113,6 +113,9 @@ async fn send_persists_messages_through_harness() {
             messages: vec![assistant_message("pong")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         }])),
         Arc::new(BasicToolRuntime),
     );
@@ -172,6 +175,390 @@ async fn send_persists_messages_through_harness() {
     );
 }
 
+#[tokio::test(flavor = "current_thread")]
+async fn usage_record_is_persisted_with_computed_cost() {
+    // Use an inline LiteLLM-schema fixture so the assertion is hermetic
+    // and doesn't depend on whatever rates the upstream JSON happens to
+    // ship today.
+    const PRICING_FIXTURE: &str = r#"{
+        "claude-sonnet-4-6": {
+            "litellm_provider": "anthropic",
+            "mode": "chat",
+            "input_cost_per_token": 3e-06,
+            "output_cost_per_token": 1.5e-05,
+            "cache_read_input_token_cost": 3e-07,
+            "cache_creation_input_token_cost": 3.75e-06
+        }
+    }"#;
+    let pricing =
+        Arc::new(cost::PricingTable::from_json_str(PRICING_FIXTURE).expect("fixture should parse"));
+
+    let tempdir = TempDir::new().expect("tempdir should exist");
+    let exoharness = Arc::new(
+        BasicExoHarness::new(local_test_config(tempdir.path().join("exoharness")))
+            .await
+            .expect("basic exoharness should initialize"),
+    ) as Arc<dyn ExoHarness>;
+    let harness = BasicHarness::with_pricing_table(
+        Arc::clone(&exoharness),
+        Arc::new(FakeModelClient::new(vec![ModelResponse {
+            response_id: Some(Uuid7::now()),
+            messages: vec![assistant_message("pong")],
+            tool_calls: Vec::new(),
+            usage: Some(UniversalUsage {
+                prompt_tokens: Some(1_000),
+                completion_tokens: Some(500),
+                prompt_cached_tokens: None,
+                prompt_cache_creation_tokens: None,
+                completion_reasoning_tokens: None,
+            }),
+            model: Some("claude-sonnet-4-6".to_string()),
+            ttft: None,
+            duration: None,
+        }])),
+        Arc::new(BasicToolRuntime),
+        pricing,
+    );
+
+    let secret_id = exoharness
+        .put_secret(PutSecretRequest {
+            name: "cost-test-key".to_string(),
+            secret: Secret::Key {
+                value: "test-key".to_string(),
+            },
+        })
+        .await
+        .expect("test secret should register");
+    exoharness
+        .put_binding(Binding::Llm {
+            name: "claude-sonnet-4-6".to_string(),
+            model: "claude-sonnet-4-6".to_string(),
+            base_url: None,
+            secret_id: Some(secret_id),
+        })
+        .await
+        .expect("binding should register");
+
+    let agent = harness
+        .create_agent(CreateAgentRequest {
+            slug: "cost-demo".to_string(),
+            name: None,
+            harness: crate::AgentHarnessKind::Basic,
+            typescript: None,
+            enable_agent_tool_creation: true,
+            sandbox_image: None,
+            sandbox_provider: SandboxProvider::LocalProcess,
+            enable_networking: false,
+            model: "claude-sonnet-4-6".to_string(),
+            max_output_tokens: None,
+            max_tool_round_trips: Some(2),
+            braintrust: None,
+        })
+        .await
+        .expect("agent should be created");
+    let conversation = agent
+        .create_conversation(CreateConversationRequest::default())
+        .await
+        .expect("conversation should be created");
+
+    conversation
+        .send(SendRequest {
+            input: vec![user_message("ping")],
+            session_id: None,
+        })
+        .await
+        .expect("send should succeed");
+
+    let events = conversation
+        .exoharness_handle()
+        .get_events(Some(EventQuery {
+            cursor: None,
+            direction: Some(EventQueryDirection::Asc),
+            limit: None,
+            session_id: None,
+            turn_id: None,
+            types: None,
+        }))
+        .await
+        .expect("get events should succeed")
+        .events;
+
+    let assistant_usage = events
+        .iter()
+        .find_map(|event| match &event.data {
+            EventData::Messages {
+                messages,
+                usage: Some(usage),
+                ..
+            } if messages
+                .iter()
+                .any(|m| matches!(m, Message::Assistant { .. })) =>
+            {
+                Some(usage)
+            }
+            _ => None,
+        })
+        .expect("assistant message event should carry a UsageRecord");
+
+    assert_eq!(assistant_usage.model, "claude-sonnet-4-6");
+    assert_eq!(assistant_usage.prompt_tokens, Some(1_000));
+    assert_eq!(assistant_usage.completion_tokens, Some(500));
+    // 1000 prompt @ $3/M + 500 completion @ $15/M = $0.003 + $0.0075 = $0.0105
+    let cost = assistant_usage.cost_usd.expect("cost should be computed");
+    assert!(
+        (cost - 0.0105).abs() < 1e-9,
+        "expected cost ~0.0105, got {cost}"
+    );
+    // Non-streaming path measures total duration.
+    assert!(
+        assistant_usage.duration_ms.is_some(),
+        "duration should be recorded"
+    );
+}
+
+#[tokio::test(flavor = "current_thread")]
+async fn usage_record_with_anthropic_cache_hits() {
+    // Anthropic accounting is additive: prompt_tokens is the fresh slice,
+    // and cache_read / cache_creation are billed separately on top. The
+    // pricing math is unit-tested in exoharness::pricing; this test is the
+    // end-to-end proof that cached counts (a) reach the persisted
+    // UsageRecord and (b) hit the discounted cache-read rate when
+    // compute_cost_usd is invoked through the executor.
+    const PRICING_FIXTURE: &str = r#"{
+        "claude-sonnet-4-6": {
+            "litellm_provider": "anthropic",
+            "mode": "chat",
+            "input_cost_per_token": 3e-06,
+            "output_cost_per_token": 1.5e-05,
+            "cache_read_input_token_cost": 3e-07,
+            "cache_creation_input_token_cost": 3.75e-06
+        }
+    }"#;
+    let pricing =
+        Arc::new(cost::PricingTable::from_json_str(PRICING_FIXTURE).expect("fixture should parse"));
+
+    let tempdir = TempDir::new().expect("tempdir should exist");
+    let exoharness = Arc::new(
+        BasicExoHarness::new(local_test_config(tempdir.path().join("exoharness")))
+            .await
+            .expect("basic exoharness should initialize"),
+    ) as Arc<dyn ExoHarness>;
+    let harness = BasicHarness::with_pricing_table(
+        Arc::clone(&exoharness),
+        Arc::new(FakeModelClient::new(vec![ModelResponse {
+            response_id: Some(Uuid7::now()),
+            messages: vec![assistant_message("pong")],
+            tool_calls: Vec::new(),
+            usage: Some(UniversalUsage {
+                prompt_tokens: Some(500),
+                completion_tokens: Some(200),
+                prompt_cached_tokens: Some(10_000),
+                prompt_cache_creation_tokens: Some(2_000),
+                completion_reasoning_tokens: None,
+            }),
+            model: Some("claude-sonnet-4-6".to_string()),
+            ttft: None,
+            duration: None,
+        }])),
+        Arc::new(BasicToolRuntime),
+        pricing,
+    );
+
+    let secret_id = exoharness
+        .put_secret(PutSecretRequest {
+            name: "anthropic-cache-key".to_string(),
+            secret: Secret::Key {
+                value: "test-key".to_string(),
+            },
+        })
+        .await
+        .expect("test secret should register");
+    exoharness
+        .put_binding(Binding::Llm {
+            name: "claude-sonnet-4-6".to_string(),
+            model: "claude-sonnet-4-6".to_string(),
+            base_url: None,
+            secret_id: Some(secret_id),
+        })
+        .await
+        .expect("binding should register");
+
+    let agent = harness
+        .create_agent(CreateAgentRequest {
+            slug: "anthropic-cache".to_string(),
+            name: None,
+            harness: crate::AgentHarnessKind::Basic,
+            typescript: None,
+            enable_agent_tool_creation: true,
+            sandbox_image: None,
+            sandbox_provider: SandboxProvider::LocalProcess,
+            enable_networking: false,
+            model: "claude-sonnet-4-6".to_string(),
+            max_output_tokens: None,
+            max_tool_round_trips: Some(2),
+            braintrust: None,
+        })
+        .await
+        .expect("agent should be created");
+    let conversation = agent
+        .create_conversation(CreateConversationRequest::default())
+        .await
+        .expect("conversation should be created");
+
+    conversation
+        .send(SendRequest {
+            input: vec![user_message("ping")],
+            session_id: None,
+        })
+        .await
+        .expect("send should succeed");
+
+    let usage = assistant_usage_record(&conversation).await;
+
+    assert_eq!(usage.model, "claude-sonnet-4-6");
+    assert_eq!(usage.prompt_tokens, Some(500));
+    assert_eq!(usage.completion_tokens, Some(200));
+    assert_eq!(usage.prompt_cached_tokens, Some(10_000));
+    assert_eq!(usage.prompt_cache_creation_tokens, Some(2_000));
+    // Anthropic-style additive:
+    //   500    fresh prompt    @ $3/M     = 0.0015
+    //   10000  cache read      @ $0.30/M  = 0.003
+    //   2000   cache creation  @ $3.75/M  = 0.0075
+    //   200    completion      @ $15/M    = 0.003
+    // total = 0.015
+    let cost = usage.cost_usd.expect("cost should be computed");
+    assert!(
+        (cost - 0.015).abs() < 1e-9,
+        "expected cost ~0.015, got {cost}"
+    );
+}
+
+#[tokio::test(flavor = "current_thread")]
+async fn usage_record_with_openai_inclusive_accounting() {
+    // OpenAI accounting is inclusive: prompt_tokens is the *total* input
+    // including any cache hits, so the executor must subtract
+    // prompt_cached_tokens before billing the fresh-input rate. Getting
+    // this wrong silently double-bills cached tokens. This test pins the
+    // behavior at the conversation-log level — the same accounting that
+    // pricing.rs exercises in isolation now also has to survive the round
+    // trip through ModelResponse → UsageRecord → persisted event.
+    const PRICING_FIXTURE: &str = r#"{
+        "gpt-4o-mini": {
+            "litellm_provider": "openai",
+            "mode": "chat",
+            "input_cost_per_token": 1.5e-07,
+            "output_cost_per_token": 6e-07,
+            "cache_read_input_token_cost": 7.5e-08
+        }
+    }"#;
+    let pricing =
+        Arc::new(cost::PricingTable::from_json_str(PRICING_FIXTURE).expect("fixture should parse"));
+
+    let tempdir = TempDir::new().expect("tempdir should exist");
+    let exoharness = Arc::new(
+        BasicExoHarness::new(local_test_config(tempdir.path().join("exoharness")))
+            .await
+            .expect("basic exoharness should initialize"),
+    ) as Arc<dyn ExoHarness>;
+    let harness = BasicHarness::with_pricing_table(
+        Arc::clone(&exoharness),
+        Arc::new(FakeModelClient::new(vec![ModelResponse {
+            response_id: Some(Uuid7::now()),
+            messages: vec![assistant_message("pong")],
+            tool_calls: Vec::new(),
+            usage: Some(UniversalUsage {
+                // prompt_tokens here *includes* the 500 cached — OpenAI
+                // convention.
+                prompt_tokens: Some(2_000),
+                completion_tokens: Some(1_000),
+                prompt_cached_tokens: Some(500),
+                prompt_cache_creation_tokens: None,
+                completion_reasoning_tokens: None,
+            }),
+            model: Some("gpt-4o-mini".to_string()),
+            ttft: None,
+            duration: None,
+        }])),
+        Arc::new(BasicToolRuntime),
+        pricing,
+    );
+
+    let secret_id = exoharness
+        .put_secret(PutSecretRequest {
+            name: "openai-cache-key".to_string(),
+            secret: Secret::Key {
+                value: "test-key".to_string(),
+            },
+        })
+        .await
+        .expect("test secret should register");
+    exoharness
+        .put_binding(Binding::Llm {
+            name: "gpt-4o-mini".to_string(),
+            model: "gpt-4o-mini".to_string(),
+            base_url: None,
+            secret_id: Some(secret_id),
+        })
+        .await
+        .expect("binding should register");
+
+    let agent = harness
+        .create_agent(CreateAgentRequest {
+            slug: "openai-cache".to_string(),
+            name: None,
+            harness: crate::AgentHarnessKind::Basic,
+            typescript: None,
+            enable_agent_tool_creation: true,
+            sandbox_image: None,
+            sandbox_provider: SandboxProvider::LocalProcess,
+            enable_networking: false,
+            model: "gpt-4o-mini".to_string(),
+            max_output_tokens: None,
+            max_tool_round_trips: Some(2),
+            braintrust: None,
+        })
+        .await
+        .expect("agent should be created");
+    let conversation = agent
+        .create_conversation(CreateConversationRequest::default())
+        .await
+        .expect("conversation should be created");
+
+    conversation
+        .send(SendRequest {
+            input: vec![user_message("ping")],
+            session_id: None,
+        })
+        .await
+        .expect("send should succeed");
+
+    let usage = assistant_usage_record(&conversation).await;
+
+    assert_eq!(usage.model, "gpt-4o-mini");
+    // Raw counts are preserved as the provider reported them — the
+    // inclusive convention only matters for the cost computation, not for
+    // the stored tokens.
+    assert_eq!(usage.prompt_tokens, Some(2_000));
+    assert_eq!(usage.completion_tokens, Some(1_000));
+    assert_eq!(usage.prompt_cached_tokens, Some(500));
+    // OpenAI-style inclusive:
+    //   non_cached = 2000 - 500       = 1500
+    //   1500   fresh prompt @ $0.15/M = 0.000225
+    //   500    cache read   @ $0.075/M = 0.0000375
+    //   1000   completion   @ $0.60/M = 0.0006
+    // total = 0.0008625
+    // If the executor mistakenly used the Anthropic-style additive
+    // formula here, it would bill all 2000 prompt tokens at the fresh
+    // rate and the total would be 0.0009375 — ~9% high — so this
+    // assertion catches the provider-classification bug the PR
+    // description calls out.
+    let cost = usage.cost_usd.expect("cost should be computed");
+    assert!(
+        (cost - 0.0008625).abs() < 1e-9,
+        "expected cost ~0.0008625, got {cost}"
+    );
+}
+
 #[tokio::test(flavor = "current_thread")]
 async fn close_session_appends_session_ended_event() {
     let tempdir = TempDir::new().expect("tempdir should exist");
@@ -187,6 +574,9 @@ async fn close_session_appends_session_ended_event() {
             messages: vec![assistant_message("pong")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         }])),
         Arc::new(BasicToolRuntime),
     );
@@ -262,12 +652,18 @@ async fn updating_agent_config_refreshes_executor_cache() {
             messages: vec![assistant_message("pong-1")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
         ModelResponse {
             response_id: Some(Uuid7::now()),
             messages: vec![assistant_message("pong-2")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
     ]));
     let harness = BasicHarness::new(exoharness, Arc::clone(&model), Arc::new(BasicToolRuntime));
@@ -344,12 +740,18 @@ async fn send_executes_shell_tool_when_enabled() {
                 },
             }],
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
         ModelResponse {
             response_id: Some(Uuid7::now()),
             messages: vec![assistant_message("done")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
     ]));
     let harness = BasicHarness::new(exoharness, Arc::clone(&model), Arc::new(BasicToolRuntime));
@@ -483,10 +885,9 @@ async fn harness_exposes_raw_exoharness_handles() {
     assert_eq!(
         agent
             .exoharness_handle()
-            .list_conversations(exoharness::ListConversationsRequest::default())
+            .list_conversations()
             .await
             .expect("list conversations through agent handle")
-            .conversations
             .len(),
         1
     );
@@ -526,12 +927,18 @@ async fn updating_mounts_recreates_conversation_sandbox() {
                 },
             }],
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
         ModelResponse {
             response_id: Some(Uuid7::now()),
             messages: vec![assistant_message("done-1")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
         ModelResponse {
             response_id: Some(Uuid7::now()),
@@ -544,12 +951,18 @@ async fn updating_mounts_recreates_conversation_sandbox() {
                 },
             }],
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
         ModelResponse {
             response_id: Some(Uuid7::now()),
             messages: vec![assistant_message("done-2")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
     ]));
     let harness = BasicHarness::new(exoharness, model, Arc::new(BasicToolRuntime));
@@ -762,18 +1175,27 @@ async fn conversation_model_override_changes_effective_model() {
             messages: vec![assistant_message("first")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
         ModelResponse {
             response_id: Some(Uuid7::now()),
             messages: vec![assistant_message("second")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
         ModelResponse {
             response_id: Some(Uuid7::now()),
             messages: vec![assistant_message("third")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
     ]));
     let harness = BasicHarness::new(exoharness, Arc::clone(&model), Arc::new(BasicToolRuntime));
@@ -925,6 +1347,45 @@ fn assistant_message(text: &str) -> Message {
     }
 }
 
+/// Fetch the UsageRecord attached to the first Messages event that
+/// contains an assistant message. Mirrors the pattern in
+/// `usage_record_is_persisted_with_computed_cost` so the new tests stay
+/// readable.
+async fn assistant_usage_record(
+    conversation: &Arc<dyn crate::HarnessConversation>,
+) -> exoharness::UsageRecord {
+    let events = conversation
+        .exoharness_handle()
+        .get_events(Some(EventQuery {
+            cursor: None,
+            direction: Some(EventQueryDirection::Asc),
+            limit: None,
+            session_id: None,
+            turn_id: None,
+            types: None,
+        }))
+        .await
+        .expect("get events should succeed")
+        .events;
+
+    events
+        .into_iter()
+        .find_map(|event| match event.data {
+            EventData::Messages {
+                messages,
+                usage: Some(usage),
+                ..
+            } if messages
+                .iter()
+                .any(|m| matches!(m, Message::Assistant { .. })) =>
+            {
+                Some(*usage)
+            }
+            _ => None,
+        })
+        .expect("assistant message event should carry a UsageRecord")
+}
+
 fn shell_command_arguments(command: &str) -> Map<String, Value> {
     Map::from_iter([(String::from("command"), Value::String(command.to_string()))])
 }
diff --git a/crates/executor/src/harness_facade.rs b/crates/executor/src/harness_facade.rs
index 71ba9c2..3c18aab 100644
--- a/crates/executor/src/harness_facade.rs
+++ b/crates/executor/src/harness_facade.rs
@@ -196,11 +196,7 @@ where
     }
 
     async fn list_conversations(&self) -> Result<Vec<ConversationRecord>> {
-        let conversations = self
-            .agent
-            .list_conversations(exoharness::ListConversationsRequest::default())
-            .await?
-            .conversations;
+        let conversations = self.agent.list_conversations().await?;
         Ok(conversations
             .into_iter()
             .map(|conversation| conversation.record().clone())
diff --git a/crates/executor/src/harness_helpers.rs b/crates/executor/src/harness_helpers.rs
index d08acb7..835cfec 100644
--- a/crates/executor/src/harness_helpers.rs
+++ b/crates/executor/src/harness_helpers.rs
@@ -68,10 +68,7 @@ pub(crate) async fn resolve_conversation_handle(
         return Ok(Some(conversation));
     }
 
-    let conversations = agent
-        .list_conversations(exoharness::ListConversationsRequest::default())
-        .await?
-        .conversations;
+    let conversations = agent.list_conversations().await?;
     Ok(conversations
         .into_iter()
         .find(|conversation| conversation.record().slug == conversation_ref))
diff --git a/crates/executor/src/harness_runtime.rs b/crates/executor/src/harness_runtime.rs
index 2932294..0017fad 100644
--- a/crates/executor/src/harness_runtime.rs
+++ b/crates/executor/src/harness_runtime.rs
@@ -400,6 +400,9 @@ fn normalize_model_response(response: UniversalResponse) -> Result<ModelResponse
         messages: response.messages,
         tool_calls,
         usage: response.usage,
+        model: response.model,
+        ttft: None,
+        duration: None,
     })
 }
 
diff --git a/crates/executor/src/lib.rs b/crates/executor/src/lib.rs
index b26517c..3d1f85c 100644
--- a/crates/executor/src/lib.rs
+++ b/crates/executor/src/lib.rs
@@ -54,9 +54,9 @@ pub use exoharness::{
     FileSystemMountMode, ForkConversationRequest, HTTP_EXOHARNESS_TRACING_TARGET, HttpExoHarness,
     PutSecretRequest, SANDBOX_MAIN_MOUNT_DIR, SandboxBackendChoice, SandboxId, SandboxProvider,
     SandboxProviderConfig, Secret, SecretBackendChoice, SecretMetadata, SessionId, SnapshotId,
-    StartSandboxRequest, ToolRequest, Uuid7, VercelBackendSpec, default_aws_agentcore_image,
-    default_daytona_image, default_docker_image, default_vercel_image,
-    serve_exoharness_http_listener, serve_exoharness_http_listener_with_options,
+    StartSandboxRequest, ToolRequest, Uuid7, VercelBackendSpec, default_daytona_image,
+    default_docker_image, default_vercel_image, serve_exoharness_http_listener,
+    serve_exoharness_http_listener_with_options,
 };
 pub use harness_basic::BasicHarness;
 pub use harness_config::load_agent_config;
diff --git a/crates/executor/src/local_sandbox.rs b/crates/executor/src/local_sandbox.rs
index 777abe4..85c760a 100644
--- a/crates/executor/src/local_sandbox.rs
+++ b/crates/executor/src/local_sandbox.rs
@@ -7,13 +7,12 @@ use exoharness::{
     AddEventsRequest, AddEventsResult, AgentHandle, AgentId, Artifact, ArtifactVersion, Binding,
     BindingId, BindingRecord, CancelSandboxProcessRequest, CloseSandboxProcessInputRequest,
     ConversationHandle, ConversationId, CreateSandboxRequest, Event, EventData, EventId, EventKind,
-    EventStream, ExoHarness, ForkConversationRequest, GetEventsResult, ListConversationsRequest,
-    ListConversationsResult, NewAgentRequest, NewConversationRequest, PutSecretRequest,
-    ReadArtifactRequest, Result, RunInSandboxRequest, SandboxId, SandboxProcess,
-    SandboxProcessEventQuery, SandboxProcessRecord, SandboxProcessStatus, Secret, SecretId,
-    SecretMetadata, SnapshotId, StartSandboxProcessRequest, StartSandboxRequest, TurnHandle,
-    TurnRecord, Uuid7, WaitSandboxProcessRequest, WriteArtifactRequest,
-    WriteSandboxProcessInputRequest,
+    EventStream, ExoHarness, ForkConversationRequest, GetEventsResult, NewAgentRequest,
+    NewConversationRequest, PutSecretRequest, ReadArtifactRequest, Result, RunInSandboxRequest,
+    SandboxId, SandboxProcess, SandboxProcessEventQuery, SandboxProcessRecord,
+    SandboxProcessStatus, Secret, SecretId, SecretMetadata, SnapshotId, StartSandboxProcessRequest,
+    StartSandboxRequest, TurnHandle, TurnRecord, Uuid7, WaitSandboxProcessRequest,
+    WriteArtifactRequest, WriteSandboxProcessInputRequest,
 };
 use serde::{Deserialize, Serialize};
 use tokio::sync::Mutex;
@@ -134,19 +133,14 @@ impl AgentHandle for LocalSandboxAgent {
         self.remote.record()
     }
 
-    async fn list_conversations(
-        &self,
-        request: ListConversationsRequest,
-    ) -> Result<ListConversationsResult<Arc<dyn ConversationHandle>>> {
-        let result = self.remote.list_conversations(request).await?;
-        Ok(ListConversationsResult {
-            conversations: result
-                .conversations
-                .into_iter()
-                .map(|remote| wrap_conversation(Arc::clone(&self.state), remote))
-                .collect(),
-            next_cursor: result.next_cursor,
-        })
+    async fn list_conversations(&self) -> Result<Vec<Arc<dyn ConversationHandle>>> {
+        Ok(self
+            .remote
+            .list_conversations()
+            .await?
+            .into_iter()
+            .map(|remote| wrap_conversation(Arc::clone(&self.state), remote))
+            .collect())
     }
 
     async fn get_conversation(
@@ -260,9 +254,8 @@ impl LocalSandboxConversation {
 
         let slug = format!("remote-{}", self.remote.record().id);
         let local_conversation = match local_agent
-            .list_conversations(ListConversationsRequest::default())
+            .list_conversations()
             .await?
-            .conversations
             .into_iter()
             .find(|conversation| conversation.record().slug == slug)
         {
diff --git a/crates/executor/src/rlm.rs b/crates/executor/src/rlm.rs
index c8b19f5..6a0fd97 100644
--- a/crates/executor/src/rlm.rs
+++ b/crates/executor/src/rlm.rs
@@ -536,6 +536,7 @@ async fn append_final_answer(
     turn.add_events(vec![EventData::Messages {
         messages: vec![assistant_message(final_answer)],
         response_id,
+        usage: None,
     }])
     .await?;
     append_custom_event(
diff --git a/crates/executor/src/rlm_tests.rs b/crates/executor/src/rlm_tests.rs
index bfddf63..952a828 100644
--- a/crates/executor/src/rlm_tests.rs
+++ b/crates/executor/src/rlm_tests.rs
@@ -46,12 +46,18 @@ async fn rlm_send_executes_repl_steps_and_persists_final_answer() {
                 },
             }],
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
         ModelResponse {
             response_id: Some(Uuid7::now()),
             messages: vec![assistant_message("FINAL(done)")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
     ]));
     let harness = RlmHarness::new(exoharness, model);
@@ -143,6 +149,9 @@ async fn rlm_subquery_variable_can_store_final_answer() {
                 },
             }],
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
         ModelResponse {
             response_id: Some(Uuid7::now()),
@@ -168,18 +177,27 @@ async fn rlm_subquery_variable_can_store_final_answer() {
                 },
             }],
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
         ModelResponse {
             response_id: Some(Uuid7::now()),
             messages: vec![assistant_message("4")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
         ModelResponse {
             response_id: Some(Uuid7::now()),
             messages: vec![assistant_message("FINAL_VAR(final_answer)")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
     ]));
     let harness = RlmHarness::new(exoharness, Arc::clone(&model));
@@ -246,6 +264,9 @@ async fn rlm_send_stream_suppresses_internal_control_text() {
             messages: vec![assistant_message("FINAL(2)")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
     }]));
     let harness = RlmHarness::new(exoharness, model);
@@ -315,6 +336,9 @@ async fn rlm_exposes_history_via_get_messages() {
             messages: vec![assistant_message("FINAL(recorded)")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
         ModelResponse {
             response_id: Some(Uuid7::now()),
@@ -343,12 +367,18 @@ globalThis.answer = String(\n\
                 },
             }],
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
         ModelResponse {
             response_id: Some(Uuid7::now()),
             messages: vec![assistant_message("FINAL_VAR(answer)")],
             tool_calls: Vec::new(),
             usage: None,
+            model: None,
+            ttft: None,
+            duration: None,
         },
     ]));
     let harness = RlmHarness::new(exoharness, model);
@@ -421,6 +451,9 @@ async fn rlm_can_finish_by_setting_final_in_repl() {
             },
         }],
         usage: None,
+        model: None,
+        ttft: None,
+        duration: None,
     }]));
     let harness = RlmHarness::new(exoharness, Arc::clone(&model));
     register_test_model(harness.exoharness_handle().as_ref()).await;
diff --git a/crates/exoharness/Cargo.toml b/crates/exoharness/Cargo.toml
index 251462f..a935611 100644
--- a/crates/exoharness/Cargo.toml
+++ b/crates/exoharness/Cargo.toml
@@ -20,12 +20,6 @@ basic-backend = [
   "dep:url",
 ]
 apple-keychain = ["dep:keyring", "dep:keyring-core"]
-aws-agentcore = [
-  "basic-backend",
-  "dep:aws-config",
-  "dep:aws-credential-types",
-  "dep:aws-sdk-bedrockagentcore",
-]
 contract-tests = []
 
 [dependencies]
@@ -33,9 +27,6 @@ aes-gcm = { version = "0.11.0-rc.3", optional = true }
 actix-web = { workspace = true, optional = true }
 anyhow.workspace = true
 async-trait.workspace = true
-aws-config = { workspace = true, optional = true }
-aws-credential-types = { workspace = true, optional = true }
-aws-sdk-bedrockagentcore = { workspace = true, optional = true }
 base64 = { version = "0.22.1", optional = true }
 bytes.workspace = true
 chrono.workspace = true
diff --git a/crates/exoharness/src/basic.rs b/crates/exoharness/src/basic.rs
index 3e38a9b..d33210f 100644
--- a/crates/exoharness/src/basic.rs
+++ b/crates/exoharness/src/basic.rs
@@ -36,14 +36,13 @@ use crate::{
     BoxAsyncRead, BoxAsyncWrite, CancelSandboxProcessRequest, CloseSandboxProcessInputRequest,
     ConversationHandle, ConversationId, ConversationRecord, CreateSandboxRequest, Event, EventData,
     EventId, EventKind, EventQuery, EventQueryDirection, EventStream, ExoHarness, FileSystemMount,
-    ForkConversationRequest, GetEventsResult, GetSandboxProcessEventsResult,
-    ListConversationsRequest, ListConversationsResult, NewAgentRequest, NewConversationRequest,
-    PutSecretRequest, ReadArtifactRequest, Result, RunInSandboxRequest, SandboxId, SandboxProcess,
-    SandboxProcessEvent, SandboxProcessEventQuery, SandboxProcessId, SandboxProcessMode,
-    SandboxProcessParts, SandboxProcessRecord, SandboxProcessStatus, SandboxProcessStdin,
-    SandboxProvider, SandboxProviderConfig, Secret, SecretId, SecretMetadata, SecretType,
-    SessionId, SnapshotId, StartSandboxProcessRequest, StartSandboxRequest, TurnHandle, TurnId,
-    TurnRecord, Uuid7, WaitSandboxProcessRequest, WriteArtifactRequest,
+    ForkConversationRequest, GetEventsResult, GetSandboxProcessEventsResult, NewAgentRequest,
+    NewConversationRequest, PutSecretRequest, ReadArtifactRequest, Result, RunInSandboxRequest,
+    SandboxId, SandboxProcess, SandboxProcessEvent, SandboxProcessEventQuery, SandboxProcessId,
+    SandboxProcessMode, SandboxProcessParts, SandboxProcessRecord, SandboxProcessStatus,
+    SandboxProcessStdin, SandboxProvider, SandboxProviderConfig, Secret, SecretId, SecretMetadata,
+    SecretType, SessionId, SnapshotId, StartSandboxProcessRequest, StartSandboxRequest, TurnHandle,
+    TurnId, TurnRecord, Uuid7, WaitSandboxProcessRequest, WriteArtifactRequest,
     WriteSandboxProcessInputRequest,
 };
 
@@ -66,7 +65,6 @@ pub enum SandboxBackendChoice {
     LocalProcess,
     Daytona(DaytonaBackendSpec),
     Vercel(VercelBackendSpec),
-    AwsAgentCore,
 }
 
 impl SandboxBackendChoice {
@@ -77,7 +75,6 @@ impl SandboxBackendChoice {
             Self::LocalProcess => SandboxProvider::LocalProcess,
             Self::Daytona(_) => SandboxProvider::Daytona,
             Self::Vercel(_) => SandboxProvider::Vercel,
-            Self::AwsAgentCore => SandboxProvider::AwsAgentCore,
         }
     }
 }
@@ -208,23 +205,6 @@ impl BasicExoHarnessInner {
                 };
                 Arc::new(crate::VercelSandboxBackend::new(config)?)
             }
-            SandboxBackendChoice::AwsAgentCore => {
-                #[cfg(feature = "aws-agentcore")]
-                {
-                    let config = self.aws_agentcore_config_from_binding().await?.ok_or_else(|| {
-                        anyhow!(
-                            "aws-agentcore sandbox requested but no sandbox provider binding is configured; run `exo provider configure --provider aws-agentcore --runtime-arn <arn>`"
-                        )
-                    })?;
-                    Arc::new(crate::AwsAgentCoreSandboxBackend::new(config).await?)
-                }
-                #[cfg(not(feature = "aws-agentcore"))]
-                {
-                    bail!(
-                        "aws-agentcore sandbox backend requires building exoharness with the aws-agentcore feature"
-                    );
-                }
-            }
         };
         Ok(backend)
     }
@@ -418,38 +398,6 @@ impl BasicExoHarnessInner {
         }))
     }
 
-    #[cfg(feature = "aws-agentcore")]
-    async fn aws_agentcore_config_from_binding(&self) -> Result<Option<crate::AwsAgentCoreConfig>> {
-        let bindings = list_binding_records(&self.storage, Path::new("bindings")).await?;
-        let Some((runtime_arn, region, qualifier, endpoint_url)) = bindings
-            .into_iter()
-            .rev()
-            .find_map(|record| match record.binding {
-                Binding::Sandbox {
-                    config:
-                        SandboxProviderConfig::AwsAgentCore {
-                            runtime_arn,
-                            region,
-                            qualifier,
-                            endpoint_url,
-                            ..
-                        },
-                    ..
-                } => Some((runtime_arn, region, qualifier, endpoint_url)),
-                _ => None,
-            })
-        else {
-            return Ok(None);
-        };
-        Ok(Some(crate::AwsAgentCoreConfig {
-            runtime_arn,
-            region,
-            qualifier,
-            endpoint_url,
-            credentials: aws_agentcore_credentials_from_env(),
-        }))
-    }
-
     /// The configured default base image for `provider`, from the newest
     /// `Binding::Sandbox` for it. `None` when no such binding exists, so the
     /// backend applies its own intrinsic default.
@@ -464,25 +412,6 @@ impl BasicExoHarnessInner {
     }
 }
 
-#[cfg(feature = "aws-agentcore")]
-fn aws_agentcore_credentials_from_env() -> Option<crate::AwsAgentCoreCredentials> {
-    let access_key_id = std::env::var("AWS_AGENTCORE_ACCESS_KEY_ID")
-        .ok()
-        .filter(|value| !value.trim().is_empty())?;
-    let secret_access_key = std::env::var("AWS_AGENTCORE_SECRET_ACCESS_KEY")
-        .ok()
-        .filter(|value| !value.trim().is_empty())?;
-    let session_token = std::env::var("AWS_AGENTCORE_SESSION_TOKEN")
-        .ok()
-        .filter(|value| !value.trim().is_empty());
-
-    Some(crate::AwsAgentCoreCredentials {
-        access_key_id,
-        secret_access_key,
-        session_token,
-    })
-}
-
 impl BasicExoHarness {
     pub async fn new(config: BasicExoHarnessConfig) -> Result<Self> {
         Self::new_with_backend(config, None).await
@@ -716,23 +645,16 @@ impl AgentHandle for BasicAgentHandle {
         &self.record
     }
 
-    async fn list_conversations(
-        &self,
-        request: ListConversationsRequest,
-    ) -> Result<ListConversationsResult<Arc<dyn ConversationHandle>>> {
+    async fn list_conversations(&self) -> Result<Vec<Arc<dyn ConversationHandle>>> {
         let mut handles: Vec<Arc<dyn ConversationHandle>> = Vec::new();
-        let result = self.list_conversation_records(request).await?;
-        for record in result.conversations {
+        for record in self.list_conversation_records().await? {
             handles.push(Arc::new(BasicConversationHandle {
                 harness: self.harness.clone(),
                 agent_id: self.record.id,
                 record,
             }));
         }
-        Ok(ListConversationsResult {
-            conversations: handles,
-            next_cursor: result.next_cursor,
-        })
+        Ok(handles)
     }
 
     async fn get_conversation(
@@ -764,10 +686,7 @@ impl AgentHandle for BasicAgentHandle {
         request: NewConversationRequest,
     ) -> Result<Arc<dyn ConversationHandle>> {
         let _guard = self.harness.inner.write_lock.lock().await;
-        let existing = self
-            .list_conversation_records(ListConversationsRequest::default())
-            .await?
-            .conversations;
+        let existing = self.list_conversation_records().await?;
         let slug = match request.slug {
             Some(slug) => {
                 if existing
@@ -993,10 +912,7 @@ impl BasicAgentHandle {
         self.agent_dir().join("artifacts")
     }
 
-    async fn list_conversation_records(
-        &self,
-        request: ListConversationsRequest,
-    ) -> Result<ListConversationsResult<ConversationRecord>> {
+    async fn list_conversation_records(&self) -> Result<Vec<ConversationRecord>> {
         let mut conversations = Vec::new();
         for key in self
             .harness
@@ -1016,48 +932,11 @@ impl BasicAgentHandle {
                     .await?,
             );
         }
-        conversations.sort_by_key(conversation_recency_key);
-        conversations.reverse();
-        paginate_conversation_records(conversations, request)
+        conversations.sort_by_key(|record| record.id);
+        Ok(conversations)
     }
 }
 
-fn conversation_recency_key(record: &ConversationRecord) -> Uuid7 {
-    record.latest_event_id.unwrap_or(record.id)
-}
-
-fn paginate_conversation_records(
-    conversations: Vec<ConversationRecord>,
-    request: ListConversationsRequest,
-) -> Result<ListConversationsResult<ConversationRecord>> {
-    let start = match request.cursor {
-        Some(cursor) => conversations
-            .iter()
-            .position(|conversation| conversation_recency_key(conversation) == cursor)
-            .map(|index| index + 1)
-            .ok_or_else(|| anyhow!("conversation cursor not found: {cursor}"))?,
-        None => 0,
-    };
-    let remaining = conversations.len().saturating_sub(start);
-    let Some(limit) = request.limit.filter(|limit| *limit > 0) else {
-        return Ok(ListConversationsResult {
-            conversations: conversations.into_iter().skip(start).collect(),
-            next_cursor: None,
-        });
-    };
-    let has_more = remaining > limit;
-    let page: Vec<_> = conversations.into_iter().skip(start).take(limit).collect();
-    let next_cursor = if has_more {
-        page.last().map(conversation_recency_key)
-    } else {
-        None
-    };
-    Ok(ListConversationsResult {
-        conversations: page,
-        next_cursor,
-    })
-}
-
 struct BasicConversationHandle {
     harness: BasicExoHarness,
     agent_id: AgentId,
@@ -1313,6 +1192,7 @@ impl ConversationHandle for BasicConversationHandle {
             events_to_append.push(EventData::Messages {
                 messages: request.input,
                 response_id: None,
+                usage: None,
             });
         }
 
@@ -1462,10 +1342,7 @@ impl ConversationHandle for BasicConversationHandle {
                 .get_json::<AgentRecord>(self.agent_dir().join("record.json"))
                 .await?,
         };
-        let existing = agent
-            .list_conversation_records(ListConversationsRequest::default())
-            .await?
-            .conversations;
+        let existing = agent.list_conversation_records().await?;
         let slug = match request.slug {
             Some(slug) => {
                 if existing
diff --git a/crates/exoharness/src/basic_tests.rs b/crates/exoharness/src/basic_tests.rs
index 36cd91d..18b38c4 100644
--- a/crates/exoharness/src/basic_tests.rs
+++ b/crates/exoharness/src/basic_tests.rs
@@ -40,17 +40,6 @@ async fn basic_backend_supports_agent_and_conversation_crud() {
     crate::contract_tests::supports_agent_and_conversation_crud(harness).await;
 }
 
-#[tokio::test(flavor = "current_thread")]
-async fn basic_backend_lists_conversations_recent_first_and_paginates() {
-    let tempdir = TempDir::new().expect("tempdir");
-    let harness: std::sync::Arc<dyn ExoHarness> = std::sync::Arc::new(
-        BasicExoHarness::new(local_test_config(tempdir.path()))
-            .await
-            .expect("harness should initialize"),
-    );
-    crate::contract_tests::list_conversations_returns_recent_first_and_paginates(harness).await;
-}
-
 #[tokio::test(flavor = "current_thread")]
 async fn basic_backend_contract_begin_turn_tracks_events_through_finish() {
     let tempdir = TempDir::new().expect("tempdir");
@@ -313,6 +302,7 @@ async fn turn_events_continue_after_artifact_writes() {
     turn.add_events(vec![EventData::Messages {
         messages: vec![assistant_message("pong")],
         response_id: None,
+        usage: None,
     }])
     .await
     .expect("append after artifact write");
diff --git a/crates/exoharness/src/contract_tests.rs b/crates/exoharness/src/contract_tests.rs
index a2f9783..897e97e 100644
--- a/crates/exoharness/src/contract_tests.rs
+++ b/crates/exoharness/src/contract_tests.rs
@@ -8,10 +8,9 @@ use lingua::universal::{AssistantContent, UserContent};
 use tokio::time::timeout;
 
 use crate::{
-    AddEventsRequest, BeginTurnRequest, Binding, EventData, EventKind, EventQuery,
-    EventQueryDirection, ExoHarness, ForkConversationRequest, ListConversationsRequest,
-    ManagedSandboxHandle, NewAgentRequest, NewConversationRequest, SandboxCommand, Uuid7,
-    WriteArtifactRequest,
+    BeginTurnRequest, Binding, EventData, EventKind, EventQuery, EventQueryDirection, ExoHarness,
+    ForkConversationRequest, ManagedSandboxHandle, NewAgentRequest, NewConversationRequest,
+    SandboxCommand, Uuid7, WriteArtifactRequest,
 };
 
 pub async fn supports_agent_and_conversation_crud(harness: Arc<dyn ExoHarness>) {
@@ -52,10 +51,9 @@ pub async fn supports_agent_and_conversation_crud(harness: Arc<dyn ExoHarness>)
     );
     assert!(
         agent
-            .list_conversations(crate::ListConversationsRequest::default())
+            .list_conversations()
             .await
             .expect("list conversations")
-            .conversations
             .iter()
             .any(|candidate| candidate.record().id == conversation.record().id)
     );
@@ -74,85 +72,6 @@ pub async fn supports_agent_and_conversation_crud(harness: Arc<dyn ExoHarness>)
     );
 }
 
-pub async fn list_conversations_returns_recent_first_and_paginates(harness: Arc<dyn ExoHarness>) {
-    let agent = harness
-        .new_agent(NewAgentRequest {
-            slug: unique_slug("agent"),
-            name: "Agent".to_string(),
-        })
-        .await
-        .expect("agent should be created");
-    let first = agent
-        .new_conversation(NewConversationRequest {
-            slug: Some(unique_slug("first")),
-            name: Some("First".to_string()),
-        })
-        .await
-        .expect("first conversation");
-    tokio::time::sleep(Duration::from_millis(2)).await;
-    let second = agent
-        .new_conversation(NewConversationRequest {
-            slug: Some(unique_slug("second")),
-            name: Some("Second".to_string()),
-        })
-        .await
-        .expect("second conversation");
-    tokio::time::sleep(Duration::from_millis(2)).await;
-    let third = agent
-        .new_conversation(NewConversationRequest {
-            slug: Some(unique_slug("third")),
-            name: Some("Third".to_string()),
-        })
-        .await
-        .expect("third conversation");
-    tokio::time::sleep(Duration::from_millis(2)).await;
-    first
-        .add_events(AddEventsRequest {
-            session_id: None,
-            turn_id: None,
-            expected_head: None,
-            data: vec![EventData::Custom {
-                event_type: "touch".to_string(),
-                payload: serde_json::Value::Null,
-            }],
-        })
-        .await
-        .expect("touch first conversation");
-
-    let page = agent
-        .list_conversations(ListConversationsRequest {
-            cursor: None,
-            limit: Some(2),
-        })
-        .await
-        .expect("first page");
-    let page_ids: Vec<_> = page
-        .conversations
-        .iter()
-        .map(|conversation| conversation.record().id)
-        .collect();
-    assert_eq!(page_ids, vec![first.record().id, third.record().id]);
-    assert_eq!(
-        page.next_cursor,
-        Some(third.record().latest_event_id.unwrap_or(third.record().id))
-    );
-
-    let next_page = agent
-        .list_conversations(ListConversationsRequest {
-            cursor: page.next_cursor,
-            limit: Some(2),
-        })
-        .await
-        .expect("second page");
-    let next_page_ids: Vec<_> = next_page
-        .conversations
-        .iter()
-        .map(|conversation| conversation.record().id)
-        .collect();
-    assert_eq!(next_page_ids, vec![second.record().id]);
-    assert_eq!(next_page.next_cursor, None);
-}
-
 pub async fn begin_turn_tracks_events_through_finish(harness: Arc<dyn ExoHarness>) {
     let agent = harness
         .new_agent(NewAgentRequest {
@@ -176,6 +95,7 @@ pub async fn begin_turn_tracks_events_through_finish(harness: Arc<dyn ExoHarness
     turn.add_events(vec![EventData::Messages {
         messages: vec![assistant_message("pong")],
         response_id: None,
+        usage: None,
     }])
     .await
     .expect("append assistant message");
@@ -248,6 +168,7 @@ pub async fn turn_events_continue_after_artifact_writes(harness: Arc<dyn ExoHarn
     turn.add_events(vec![EventData::Messages {
         messages: vec![assistant_message("pong")],
         response_id: None,
+        usage: None,
     }])
     .await
     .expect("append after artifact write");
diff --git a/crates/exoharness/src/http/client.rs b/crates/exoharness/src/http/client.rs
index 97ebcaa..735f4cd 100644
--- a/crates/exoharness/src/http/client.rs
+++ b/crates/exoharness/src/http/client.rs
@@ -20,12 +20,12 @@ use crate::{
     CancelSandboxProcessRequest, CloseSandboxProcessInputRequest, ConversationHandle,
     ConversationId, ConversationRecord, CreateSandboxRequest, Event, EventData, EventId,
     EventQuery, EventStream, ExoHarness, ForkConversationRequest, GetEventsResult,
-    GetSandboxProcessEventsResult, ListConversationsRequest, ListConversationsResult,
-    NewAgentRequest, NewConversationRequest, PutSecretRequest, ReadArtifactRequest, Result,
-    RunInSandboxRequest, SandboxId, SandboxProcess, SandboxProcessEventQuery, SandboxProcessParts,
-    SandboxProcessRecord, SandboxProcessStatus, Secret, SecretId, SecretMetadata, SessionId,
-    SnapshotId, StartSandboxProcessRequest, StartSandboxRequest, TurnHandle, TurnRecord,
-    WaitSandboxProcessRequest, WriteArtifactRequest, WriteSandboxProcessInputRequest,
+    GetSandboxProcessEventsResult, NewAgentRequest, NewConversationRequest, PutSecretRequest,
+    ReadArtifactRequest, Result, RunInSandboxRequest, SandboxId, SandboxProcess,
+    SandboxProcessEventQuery, SandboxProcessParts, SandboxProcessRecord, SandboxProcessStatus,
+    Secret, SecretId, SecretMetadata, SessionId, SnapshotId, StartSandboxProcessRequest,
+    StartSandboxRequest, TurnHandle, TurnRecord, WaitSandboxProcessRequest, WriteArtifactRequest,
+    WriteSandboxProcessInputRequest,
 };
 
 #[derive(Clone)]
@@ -199,31 +199,23 @@ impl AgentHandle for HttpAgentHandle {
         &self.record
     }
 
-    async fn list_conversations(
-        &self,
-        request: ListConversationsRequest,
-    ) -> Result<ListConversationsResult<Arc<dyn ConversationHandle>>> {
+    async fn list_conversations(&self) -> Result<Vec<Arc<dyn ConversationHandle>>> {
         match self
             .harness
             .request(Request::ListConversations {
                 agent_id: self.record.id,
-                request,
             })
             .await?
         {
-            Response::Conversations { result } => Ok(ListConversationsResult {
-                conversations: result
-                    .conversations
-                    .into_iter()
-                    .map(|conversation| {
-                        Arc::new(HttpConversationHandle::new(
-                            self.harness.clone(),
-                            conversation,
-                        )) as _
-                    })
-                    .collect(),
-                next_cursor: result.next_cursor,
-            }),
+            Response::Conversations { conversations } => Ok(conversations
+                .into_iter()
+                .map(|conversation| {
+                    Arc::new(HttpConversationHandle::new(
+                        self.harness.clone(),
+                        conversation,
+                    )) as _
+                })
+                .collect()),
             response => unexpected_response(response, "conversations"),
         }
     }
diff --git a/crates/exoharness/src/http_tests.rs b/crates/exoharness/src/http_tests.rs
index 8312d8e..59dae77 100644
--- a/crates/exoharness/src/http_tests.rs
+++ b/crates/exoharness/src/http_tests.rs
@@ -48,15 +48,6 @@ async fn http_exoharness_supports_agent_and_conversation_crud() {
     crate::contract_tests::supports_agent_and_conversation_crud(Arc::clone(&fixture.harness)).await;
 }
 
-#[actix_web::test]
-async fn http_exoharness_lists_conversations_recent_first_and_paginates() {
-    let fixture = http_harness().await;
-    crate::contract_tests::list_conversations_returns_recent_first_and_paginates(Arc::clone(
-        &fixture.harness,
-    ))
-    .await;
-}
-
 #[actix_web::test]
 async fn http_exoharness_begin_turn_tracks_events_through_finish() {
     let fixture = http_harness().await;
diff --git a/crates/exoharness/src/protocol.rs b/crates/exoharness/src/protocol.rs
index ef53608..40f8bc7 100644
--- a/crates/exoharness/src/protocol.rs
+++ b/crates/exoharness/src/protocol.rs
@@ -5,11 +5,11 @@ use crate::{
     BeginTurnRequest, Binding, BindingId, BindingRecord, CancelSandboxProcessRequest,
     CloseSandboxProcessInputRequest, ConversationId, ConversationRecord, CreateSandboxRequest,
     Event, EventData, EventId, EventQuery, ForkConversationRequest, GetEventsResult,
-    GetSandboxProcessEventsResult, ListConversationsRequest, ListConversationsResult,
-    NewAgentRequest, NewConversationRequest, PutSecretRequest, ReadArtifactRequest, SandboxId,
-    SandboxProcessEventQuery, SandboxProcessRecord, SandboxProcessStatus, Secret, SecretId,
-    SecretMetadata, SessionId, SnapshotId, StartSandboxProcessRequest, StartSandboxRequest, TurnId,
-    TurnRecord, WaitSandboxProcessRequest, WriteArtifactRequest, WriteSandboxProcessInputRequest,
+    GetSandboxProcessEventsResult, NewAgentRequest, NewConversationRequest, PutSecretRequest,
+    ReadArtifactRequest, SandboxId, SandboxProcessEventQuery, SandboxProcessRecord,
+    SandboxProcessStatus, Secret, SecretId, SecretMetadata, SessionId, SnapshotId,
+    StartSandboxProcessRequest, StartSandboxRequest, TurnId, TurnRecord, WaitSandboxProcessRequest,
+    WriteArtifactRequest, WriteSandboxProcessInputRequest,
 };
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
@@ -70,7 +70,6 @@ pub enum Request {
     },
     ListConversations {
         agent_id: AgentId,
-        request: ListConversationsRequest,
     },
     GetConversation {
         agent_id: AgentId,
@@ -343,7 +342,7 @@ pub enum Response {
         value: bool,
     },
     Conversations {
-        result: ListConversationsResult<ConversationHandleInfo>,
+        conversations: Vec<ConversationHandleInfo>,
     },
     Conversation {
         conversation: Option<ConversationHandleInfo>,
diff --git a/crates/exoharness/src/sandbox_provider/aws_agentcore.rs b/crates/exoharness/src/sandbox_provider/aws_agentcore.rs
deleted file mode 100644
index dfee119..0000000
--- a/crates/exoharness/src/sandbox_provider/aws_agentcore.rs
+++ /dev/null
@@ -1,421 +0,0 @@
-use std::sync::Arc;
-
-use anyhow::{Context, Result, bail};
-use async_trait::async_trait;
-use aws_config::{BehaviorVersion, Region};
-use aws_credential_types::Credentials;
-use aws_sdk_bedrockagentcore::Client;
-use aws_sdk_bedrockagentcore::primitives::Blob;
-use serde::{Deserialize, Serialize};
-
-use crate::sandbox::{
-    ManagedSandboxBackend, ManagedSandboxHandle, SandboxCommand, SandboxCommandOutput,
-    SandboxNetworkPolicy, SandboxRequest, SandboxSpec, SnapshotPayload, sandbox_spec_hash,
-};
-use crate::sandbox_provider::process_bridge;
-
-pub fn default_aws_agentcore_image() -> String {
-    String::new()
-}
-
-#[derive(Debug, Clone)]
-pub struct AwsAgentCoreConfig {
-    pub runtime_arn: String,
-    pub region: String,
-    pub qualifier: Option<String>,
-    pub endpoint_url: Option<String>,
-    pub credentials: Option<AwsAgentCoreCredentials>,
-}
-
-#[derive(Debug, Clone)]
-pub struct AwsAgentCoreCredentials {
-    pub access_key_id: String,
-    pub secret_access_key: String,
-    pub session_token: Option<String>,
-}
-
-pub struct AwsAgentCoreSandboxBackend {
-    client: Client,
-    runtime_arn: String,
-    invoke_target: AwsAgentCoreInvokeTarget,
-    qualifier: Option<String>,
-}
-
-impl AwsAgentCoreSandboxBackend {
-    pub async fn new(config: AwsAgentCoreConfig) -> Result<Self> {
-        if config.runtime_arn.trim().is_empty() {
-            bail!("AgentCore runtime ARN must not be empty");
-        }
-        if config.region.trim().is_empty() {
-            bail!("AgentCore region must not be empty");
-        }
-        let region = config.region;
-        let mut sdk_config_loader =
-            aws_config::defaults(BehaviorVersion::latest()).region(Region::new(region.clone()));
-        if let Some(credentials) = config.credentials {
-            sdk_config_loader = sdk_config_loader.credentials_provider(Credentials::new(
-                credentials.access_key_id,
-                credentials.secret_access_key,
-                credentials.session_token,
-                None,
-                "aws-agentcore",
-            ));
-        }
-        let sdk_config = sdk_config_loader.load().await;
-        let mut service_config_builder =
-            aws_sdk_bedrockagentcore::config::Builder::from(&sdk_config);
-        if let Some(endpoint_url) = config.endpoint_url {
-            service_config_builder = service_config_builder.endpoint_url(endpoint_url);
-        } else {
-            service_config_builder.set_endpoint_url(None);
-        }
-        let service_config = service_config_builder.build();
-        let invoke_target = agentcore_invoke_target(&config.runtime_arn)?;
-        Ok(Self {
-            client: Client::from_conf(service_config),
-            runtime_arn: config.runtime_arn,
-            invoke_target,
-            qualifier: config.qualifier,
-        })
-    }
-}
-
-#[async_trait]
-impl ManagedSandboxBackend for AwsAgentCoreSandboxBackend {
-    async fn acquire(&self, request: SandboxRequest) -> Result<Arc<dyn ManagedSandboxHandle>> {
-        reject_unsupported_request(&request)?;
-        let spec_hash = sandbox_spec_hash(&request.spec);
-        let runtime_session_id = agentcore_runtime_session_id(&request, &spec_hash);
-        Ok(Arc::new(AwsAgentCoreSandboxHandle {
-            id: format!("aws-agentcore:{runtime_session_id}"),
-            runtime_session_id,
-            request,
-            backend: AwsAgentCoreBackendHandle {
-                client: self.client.clone(),
-                runtime_arn: self.runtime_arn.clone(),
-                invoke_target: self.invoke_target.clone(),
-                qualifier: self.qualifier.clone(),
-            },
-        }))
-    }
-
-    async fn acquire_from_snapshot(
-        &self,
-        _request: SandboxRequest,
-        _payload: SnapshotPayload,
-    ) -> Result<Arc<dyn ManagedSandboxHandle>> {
-        bail!("restoring an AgentCore sandbox from a snapshot is not implemented yet");
-    }
-}
-
-#[derive(Clone)]
-struct AwsAgentCoreBackendHandle {
-    client: Client,
-    runtime_arn: String,
-    invoke_target: AwsAgentCoreInvokeTarget,
-    qualifier: Option<String>,
-}
-
-#[derive(Clone)]
-struct AwsAgentCoreInvokeTarget {
-    runtime_identifier: String,
-    account_id: String,
-}
-
-struct AwsAgentCoreSandboxHandle {
-    id: String,
-    runtime_session_id: String,
-    request: SandboxRequest,
-    backend: AwsAgentCoreBackendHandle,
-}
-
-#[async_trait]
-impl ManagedSandboxHandle for AwsAgentCoreSandboxHandle {
-    fn id(&self) -> &str {
-        &self.id
-    }
-
-    async fn exec(&self, command: &SandboxCommand) -> Result<SandboxCommandOutput> {
-        exec_in_agentcore(
-            &self.backend,
-            &self.runtime_session_id,
-            &self.request.spec,
-            command,
-        )
-        .await
-    }
-
-    async fn start_process(&self, command: &SandboxCommand) -> Result<crate::SandboxProcessParts> {
-        start_process_in_agentcore(
-            &self.backend,
-            &self.runtime_session_id,
-            &self.request.spec,
-            command,
-        )
-        .await
-    }
-
-    async fn stop(&self) -> Result<()> {
-        let mut request = self
-            .backend
-            .client
-            .stop_runtime_session()
-            .agent_runtime_arn(self.backend.runtime_arn.clone())
-            .runtime_session_id(self.runtime_session_id.clone());
-        if let Some(qualifier) = &self.backend.qualifier {
-            request = request.qualifier(qualifier.clone());
-        }
-        request.send().await.with_context(|| {
-            format!(
-                "stopping AgentCore runtime session {}",
-                self.runtime_session_id
-            )
-        })?;
-        Ok(())
-    }
-
-    async fn snapshot(&self) -> Result<SnapshotPayload> {
-        bail!("AgentCore sandbox snapshots are not implemented yet");
-    }
-}
-
-async fn exec_in_agentcore(
-    backend: &AwsAgentCoreBackendHandle,
-    runtime_session_id: &str,
-    spec: &SandboxSpec,
-    command: &SandboxCommand,
-) -> Result<SandboxCommandOutput> {
-    if command.argv.is_empty() {
-        bail!("sandbox command requires at least one argv entry");
-    }
-    let cwd = command
-        .cwd
-        .clone()
-        .unwrap_or_else(|| spec.default_workdir.clone());
-    let body = AgentCoreExecRequest {
-        request_type: "exec",
-        argv: command.argv.clone(),
-        env: command.env.clone(),
-        cwd: cwd.clone(),
-        timeout_ms: command.timeout.map(duration_to_millis),
-    };
-    let response: AgentCoreExecResponse =
-        invoke_agentcore_json(backend, runtime_session_id, &body).await?;
-    if let Some(error) = response.error {
-        bail!("AgentCore exec failed: {error}");
-    }
-    let exit_code = response
-        .exit_code
-        .context("AgentCore exec response did not include exit_code")?;
-    let ok = response.ok.unwrap_or(exit_code == 0);
-    Ok(SandboxCommandOutput {
-        ok,
-        exit_code: Some(exit_code),
-        stdout: response.stdout.unwrap_or_default(),
-        stderr: response.stderr.unwrap_or_default(),
-        command: command.argv.clone(),
-        cwd: response.cwd.unwrap_or(cwd),
-    })
-}
-
-async fn start_process_in_agentcore(
-    backend: &AwsAgentCoreBackendHandle,
-    runtime_session_id: &str,
-    spec: &SandboxSpec,
-    command: &SandboxCommand,
-) -> Result<crate::SandboxProcessParts> {
-    if command.argv.is_empty() {
-        bail!("sandbox command requires at least one argv entry");
-    }
-    let cwd = command
-        .cwd
-        .clone()
-        .unwrap_or_else(|| spec.default_workdir.clone());
-    let response: AgentCoreStartProcessResponse = invoke_agentcore_json(
-        backend,
-        runtime_session_id,
-        &AgentCoreStartProcessRequest {
-            request_type: "start_process",
-            argv: command.argv.clone(),
-            env: command.env.clone(),
-            cwd,
-        },
-    )
-    .await?;
-    let client = AwsAgentCoreProcessBridgeClient {
-        backend: backend.clone(),
-        runtime_session_id: runtime_session_id.to_string(),
-        process_id: response.process_id,
-    };
-    Ok(process_bridge::process_parts(Arc::new(client)))
-}
-
-struct AwsAgentCoreProcessBridgeClient {
-    backend: AwsAgentCoreBackendHandle,
-    runtime_session_id: String,
-    process_id: String,
-}
-
-#[async_trait]
-impl process_bridge::Client for AwsAgentCoreProcessBridgeClient {
-    async fn request(&self, request: process_bridge::Request) -> Result<process_bridge::Response> {
-        invoke_agentcore_json(
-            &self.backend,
-            &self.runtime_session_id,
-            &AgentCoreProcessBridgeRequest {
-                request_type: "process_bridge",
-                process_id: self.process_id.clone(),
-                request,
-            },
-        )
-        .await
-    }
-}
-
-async fn invoke_agentcore_json<Request, Response>(
-    backend: &AwsAgentCoreBackendHandle,
-    runtime_session_id: &str,
-    body: &Request,
-) -> Result<Response>
-where
-    Request: Serialize,
-    Response: for<'de> Deserialize<'de>,
-{
-    let payload = serde_json::to_vec(body).context("serializing AgentCore runtime request")?;
-    let mut request = backend
-        .client
-        .invoke_agent_runtime()
-        .agent_runtime_arn(backend.invoke_target.runtime_identifier.clone())
-        .account_id(backend.invoke_target.account_id.clone())
-        .runtime_session_id(runtime_session_id.to_string())
-        .content_type("application/json")
-        .accept("application/json")
-        .payload(Blob::new(payload));
-    if let Some(qualifier) = &backend.qualifier {
-        request = request.qualifier(qualifier.clone());
-    }
-    let output = request.send().await.with_context(|| {
-        format!(
-            "invoking AgentCore runtime session {runtime_session_id} with runtime {} in account {}",
-            backend.invoke_target.runtime_identifier, backend.invoke_target.account_id,
-        )
-    })?;
-    let status_code = output.status_code.unwrap_or(200);
-    let bytes = output
-        .response
-        .collect()
-        .await
-        .context("reading AgentCore runtime response body")?
-        .into_bytes();
-    if !(200..300).contains(&status_code) {
-        let text = String::from_utf8_lossy(&bytes);
-        bail!("AgentCore runtime returned status {status_code}: {text}");
-    }
-    serde_json::from_slice(&bytes).with_context(|| {
-        let text = String::from_utf8_lossy(&bytes);
-        format!("decoding AgentCore runtime JSON response: {text}")
-    })
-}
-
-fn agentcore_invoke_target(runtime_arn: &str) -> Result<AwsAgentCoreInvokeTarget> {
-    let mut parts = runtime_arn.splitn(6, ':');
-    let arn = parts.next();
-    let _partition = parts.next();
-    let service = parts.next();
-    let _region = parts.next();
-    let account_id = parts.next();
-    let resource = parts.next();
-    let runtime_identifier = resource.and_then(|resource| resource.strip_prefix("runtime/"));
-    match (arn, service, account_id, runtime_identifier) {
-        (Some("arn"), Some("bedrock-agentcore"), Some(account_id), Some(runtime_identifier))
-            if !account_id.is_empty() && !runtime_identifier.is_empty() =>
-        {
-            Ok(AwsAgentCoreInvokeTarget {
-                runtime_identifier: runtime_identifier.to_string(),
-                account_id: account_id.to_string(),
-            })
-        }
-        _ => bail!(
-            "AgentCore runtime ARN must have the form arn:...:bedrock-agentcore:...:<account-id>:runtime/<runtime-id>"
-        ),
-    }
-}
-
-fn reject_unsupported_request(request: &SandboxRequest) -> Result<()> {
-    if !request.spec.mounts.is_empty() {
-        bail!(
-            "AgentCore sandbox backend does not support host bind-mounts; remove conversation mounts or use a local sandbox provider"
-        );
-    }
-    if matches!(request.spec.network, SandboxNetworkPolicy::Disabled) {
-        bail!("AgentCore sandbox backend cannot enforce disabled networking");
-    }
-    Ok(())
-}
-
-fn agentcore_runtime_session_id(request: &SandboxRequest, spec_hash: &str) -> String {
-    let input = format!("{}\n{spec_hash}", request.key);
-    format!("exo-{}-session-0000000000", stable_fnv1a_hex(&input))
-}
-
-fn stable_fnv1a_hex(input: &str) -> String {
-    let mut hash = 0xcbf29ce484222325u64;
-    for byte in input.as_bytes() {
-        hash ^= u64::from(*byte);
-        hash = hash.wrapping_mul(0x100000001b3);
-    }
-    format!("{hash:016x}")
-}
-
-fn duration_to_millis(duration: std::time::Duration) -> u64 {
-    duration.as_millis().min(u128::from(u64::MAX)) as u64
-}
-
-#[derive(Debug, Serialize)]
-struct AgentCoreExecRequest {
-    #[serde(rename = "type")]
-    request_type: &'static str,
-    argv: Vec<String>,
-    env: std::collections::HashMap<String, String>,
-    cwd: String,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    timeout_ms: Option<u64>,
-}
-
-#[derive(Debug, Serialize)]
-struct AgentCoreStartProcessRequest {
-    #[serde(rename = "type")]
-    request_type: &'static str,
-    argv: Vec<String>,
-    env: std::collections::HashMap<String, String>,
-    cwd: String,
-}
-
-#[derive(Debug, Deserialize)]
-struct AgentCoreStartProcessResponse {
-    process_id: String,
-}
-
-#[derive(Debug, Serialize)]
-struct AgentCoreProcessBridgeRequest {
-    #[serde(rename = "type")]
-    request_type: &'static str,
-    process_id: String,
-    request: process_bridge::Request,
-}
-
-#[derive(Debug, Deserialize)]
-struct AgentCoreExecResponse {
-    #[serde(default)]
-    ok: Option<bool>,
-    #[serde(default)]
-    exit_code: Option<i32>,
-    #[serde(default)]
-    stdout: Option<String>,
-    #[serde(default)]
-    stderr: Option<String>,
-    #[serde(default)]
-    cwd: Option<String>,
-    #[serde(default)]
-    error: Option<String>,
-}
diff --git a/crates/exoharness/src/sandbox_provider/mod.rs b/crates/exoharness/src/sandbox_provider/mod.rs
index a876438..15bcd63 100644
--- a/crates/exoharness/src/sandbox_provider/mod.rs
+++ b/crates/exoharness/src/sandbox_provider/mod.rs
@@ -10,22 +10,6 @@ mod daytona {
         "daytonaio/sandbox:0.8.0".to_string()
     }
 }
-#[cfg(all(
-    not(target_arch = "wasm32"),
-    feature = "basic-backend",
-    feature = "aws-agentcore"
-))]
-mod aws_agentcore;
-#[cfg(not(all(
-    not(target_arch = "wasm32"),
-    feature = "basic-backend",
-    feature = "aws-agentcore"
-)))]
-mod aws_agentcore {
-    pub fn default_aws_agentcore_image() -> String {
-        String::new()
-    }
-}
 #[cfg(all(not(target_arch = "wasm32"), feature = "basic-backend"))]
 mod process_bridge;
 #[cfg(all(not(target_arch = "wasm32"), feature = "basic-backend"))]
@@ -37,13 +21,6 @@ mod vercel {
     }
 }
 
-pub use aws_agentcore::default_aws_agentcore_image;
-#[cfg(all(
-    not(target_arch = "wasm32"),
-    feature = "basic-backend",
-    feature = "aws-agentcore"
-))]
-pub use aws_agentcore::{AwsAgentCoreConfig, AwsAgentCoreCredentials, AwsAgentCoreSandboxBackend};
 pub use daytona::default_daytona_image;
 #[cfg(all(not(target_arch = "wasm32"), feature = "basic-backend"))]
 pub use daytona::{
diff --git a/crates/exoharness/src/server.rs b/crates/exoharness/src/server.rs
index 84eb7a2..cfa0c0e 100644
--- a/crates/exoharness/src/server.rs
+++ b/crates/exoharness/src/server.rs
@@ -5,8 +5,8 @@ use tokio::io::{AsyncBufReadExt, AsyncRead, AsyncWrite, AsyncWriteExt, BufReader
 
 use crate::protocol::{ClientMessage, ConversationHandleInfo, Request, Response, ServerMessage};
 use crate::{
-    AgentHandle, AgentId, ConversationHandle, ConversationId, ExoHarness, ListConversationsResult,
-    Result, SessionId, TurnHandle, TurnId, TurnRecord,
+    AgentHandle, AgentId, ConversationHandle, ConversationId, ExoHarness, Result, SessionId,
+    TurnHandle, TurnId, TurnRecord,
 };
 
 pub struct ExoHarnessServer {
@@ -63,21 +63,18 @@ impl ExoHarnessServer {
             Request::GetSecret { secret_id } => Ok(Response::Secret {
                 secret: self.root.get_secret(&secret_id).await?,
             }),
-            Request::ListConversations { agent_id, request } => {
+            Request::ListConversations { agent_id } => {
                 let agent = self.require_agent(&agent_id).await?;
-                let result = agent.list_conversations(request).await?;
                 Ok(Response::Conversations {
-                    result: ListConversationsResult {
-                        conversations: result
-                            .conversations
-                            .into_iter()
-                            .map(|conversation| ConversationHandleInfo {
-                                agent_id,
-                                record: conversation.record().clone(),
-                            })
-                            .collect(),
-                        next_cursor: result.next_cursor,
-                    },
+                    conversations: agent
+                        .list_conversations()
+                        .await?
+                        .into_iter()
+                        .map(|conversation| ConversationHandleInfo {
+                            agent_id,
+                            record: conversation.record().clone(),
+                        })
+                        .collect(),
                 })
             }
             Request::GetConversation {
diff --git a/crates/exoharness/src/types.rs b/crates/exoharness/src/types.rs
index 9061519..fd6ce6d 100644
--- a/crates/exoharness/src/types.rs
+++ b/crates/exoharness/src/types.rs
@@ -37,10 +37,7 @@ pub trait ExoHarness: Send + Sync {
 pub trait AgentHandle: Send + Sync {
     fn record(&self) -> &AgentRecord;
 
-    async fn list_conversations(
-        &self,
-        request: ListConversationsRequest,
-    ) -> Result<ListConversationsResult<Arc<dyn ConversationHandle>>>;
+    async fn list_conversations(&self) -> Result<Vec<Arc<dyn ConversationHandle>>>;
     async fn get_conversation(
         &self,
         id: &ConversationId,
@@ -162,18 +159,6 @@ pub struct NewConversationRequest {
     pub name: Option<String>,
 }
 
-#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
-pub struct ListConversationsRequest {
-    pub cursor: Option<EventId>,
-    pub limit: Option<usize>,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-pub struct ListConversationsResult<T> {
-    pub conversations: Vec<T>,
-    pub next_cursor: Option<EventId>,
-}
-
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 pub struct TurnRecord {
     pub id: TurnId,
@@ -277,6 +262,31 @@ pub struct ForkConversationRequest {
     pub name: Option<String>,
 }
 
+#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
+pub struct UsageRecord {
+    pub model: String,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub prompt_tokens: Option<i64>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub completion_tokens: Option<i64>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub prompt_cached_tokens: Option<i64>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub prompt_cache_creation_tokens: Option<i64>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub completion_reasoning_tokens: Option<i64>,
+    /// Cost in USD, computed at call time from the price table baked into
+    /// this binary. `None` if the model is not in the table.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub cost_usd: Option<f64>,
+    /// Time to first token (streaming only).
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub ttft_ms: Option<u64>,
+    /// Wall-clock duration from request start to end of response.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub duration_ms: Option<u64>,
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Event {
     pub id: EventId,
@@ -310,6 +320,11 @@ pub enum EventData {
     Messages {
         messages: Vec<Message>,
         response_id: Option<ResponseId>,
+        // Boxed to keep `EventData` small: `UsageRecord` is ~170 bytes and
+        // most events don't carry it, so inlining it would bloat every
+        // variant (and every enum that embeds `EventData`).
+        #[serde(default, skip_serializing_if = "Option::is_none")]
+        usage: Option<Box<UsageRecord>>,
     },
     ToolRequested {
         tool_call_id: ToolCallId,
@@ -483,8 +498,6 @@ pub enum SandboxProvider {
     #[default]
     Daytona,
     Vercel,
-    #[serde(rename = "aws_agentcore", alias = "aws-agentcore")]
-    AwsAgentCore,
     AppleContainer,
     Docker,
     #[serde(alias = "local")]
@@ -503,7 +516,6 @@ impl SandboxProvider {
         match self {
             Self::Daytona => "daytona",
             Self::Vercel => "vercel",
-            Self::AwsAgentCore => "aws-agentcore",
             Self::AppleContainer => "apple-container",
             Self::Docker => "docker",
             Self::LocalProcess => "local-process",
@@ -524,7 +536,6 @@ impl FromStr for SandboxProvider {
         match value.trim() {
             "daytona" => Ok(Self::Daytona),
             "vercel" => Ok(Self::Vercel),
-            "aws-agentcore" | "aws_agentcore" => Ok(Self::AwsAgentCore),
             "apple-container" | "apple_container" => Ok(Self::AppleContainer),
             "docker" => Ok(Self::Docker),
             "local" | "local-process" | "local_process" => Ok(Self::LocalProcess),
@@ -795,17 +806,6 @@ pub enum SandboxProviderConfig {
         #[serde(default = "crate::sandbox_provider::default_vercel_image")]
         default_image: String,
     },
-    #[serde(rename = "aws_agentcore", alias = "aws-agentcore")]
-    AwsAgentCore {
-        runtime_arn: String,
-        region: String,
-        #[serde(default, skip_serializing_if = "Option::is_none")]
-        qualifier: Option<String>,
-        #[serde(default, skip_serializing_if = "Option::is_none")]
-        endpoint_url: Option<String>,
-        #[serde(default = "crate::sandbox_provider::default_aws_agentcore_image")]
-        default_image: String,
-    },
 }
 
 impl SandboxProviderConfig {
@@ -814,7 +814,6 @@ impl SandboxProviderConfig {
             Self::Daytona { .. } => SandboxProvider::Daytona,
             Self::Vercel { .. } => SandboxProvider::Vercel,
             Self::Docker { .. } => SandboxProvider::Docker,
-            Self::AwsAgentCore { .. } => SandboxProvider::AwsAgentCore,
         }
     }
 
@@ -823,8 +822,7 @@ impl SandboxProviderConfig {
         match self {
             Self::Daytona { default_image, .. }
             | Self::Vercel { default_image, .. }
-            | Self::Docker { default_image, .. }
-            | Self::AwsAgentCore { default_image, .. } => default_image,
+            | Self::Docker { default_image, .. } => default_image,
         }
     }
 }
@@ -882,6 +880,51 @@ mod tests {
         );
     }
 
+    #[test]
+    fn messages_event_deserializes_without_usage_field() {
+        // Older logs predate per-message cost tracking and have no `usage`
+        // field on Messages events; serde(default) must keep them readable.
+        let json = serde_json::json!({
+            "type": "messages",
+            "messages": [],
+            "response_id": null,
+        });
+        let event: EventData = serde_json::from_value(json).expect("legacy event should parse");
+        match event {
+            EventData::Messages { usage, .. } => assert!(usage.is_none()),
+            _ => panic!("expected Messages variant"),
+        }
+    }
+
+    #[test]
+    fn messages_event_serializes_usage_when_present() {
+        let event = EventData::Messages {
+            messages: vec![],
+            response_id: None,
+            usage: Some(Box::new(UsageRecord {
+                model: "claude-sonnet-4-6".to_string(),
+                prompt_tokens: Some(100),
+                completion_tokens: Some(50),
+                cost_usd: Some(0.001),
+                ..Default::default()
+            })),
+        };
+        let value = serde_json::to_value(&event).expect("event should serialize");
+        let usage = value.get("usage").expect("usage field present");
+        assert_eq!(
+            usage.get("model").and_then(Value::as_str),
+            Some("claude-sonnet-4-6")
+        );
+        assert_eq!(
+            usage.get("prompt_tokens").and_then(Value::as_i64),
+            Some(100)
+        );
+        assert!((usage.get("cost_usd").and_then(Value::as_f64).unwrap() - 0.001).abs() < 1e-9);
+        // Round-trip back through the legacy-event parser
+        let parsed: EventData = serde_json::from_value(value).expect("round-trip parse");
+        assert!(matches!(parsed, EventData::Messages { usage: Some(_), .. }));
+    }
+
     #[test]
     fn serializes_mount_modes_as_ro_rw() {
         let ro =
@@ -910,13 +953,11 @@ mod tests {
             "vercel".parse::<SandboxProvider>().unwrap(),
             SandboxProvider::Vercel
         );
-        assert!("agentcore".parse::<SandboxProvider>().is_err());
         assert_eq!(
             SandboxProvider::AppleContainer.to_string(),
             "apple-container"
         );
         assert_eq!(SandboxProvider::Vercel.to_string(), "vercel");
-        assert_eq!(SandboxProvider::AwsAgentCore.to_string(), "aws-agentcore");
         assert_eq!(SandboxProvider::LocalProcess.to_string(), "local-process");
     }
 }
diff --git a/docs/cost-tracking-design.md b/docs/cost-tracking-design.md
new file mode 100644
index 0000000..57ff2b9
--- /dev/null
+++ b/docs/cost-tracking-design.md
@@ -0,0 +1,180 @@
+# Per-Message Cost, Token, and Latency Tracking
+
+Every LLM response in exo carries a durable, per-message record of token usage
+and timing in the canonical event log. **Dollar cost is a policy computed in
+userspace, not by the trusted substrate** — the store persists whatever numbers
+the event carries. Cost computation is an agent-side policy, so should not live
+in the exoharness.
+
+This document describes what is recorded, who computes cost and where, and the
+reasoning behind that split.
+
+## Design Principles
+
+- **Cost is policy, and policy lives in userspace.** How to turn tokens into
+  dollars (which price table, which provider formula, when to compute) is an
+  application decision, not a substrate responsibility. To not force each substrate
+  to reimplement, we provide a standalone cost library that executors call.
+- **The substrate stays minimal.** `exoharness` stores the usage record verbatim,
+  including an optional `cost_usd` field, but contains no pricing code and never
+  computes, validates, or owns cost. It round-trips bytes.
+- **Usage is agent-reported telemetry, not an attested ledger.** The token counts
+  come from the provider response, which for the TypeScript/exoclaw path is read
+  _inside the agent-side harness process_ (the model call is not made by the
+  trusted Rust core — there is no model-completion runtime request). So usage is
+  self-reported wherever cost is later computed. The implication is that it cannot
+  be fully trustworthy: the agent owns its own calls so can report whatever numbers
+  it wants.
+- **A maintained data source, not a hand-rolled table.** Prices change often and
+  across many providers, so the _data_ is the community-maintained LiteLLM price
+  database — one source of truth, shared by every consumer of the cost library.
+- **Backward and forward compatible.** The usage record and all sub-fields are
+  optional; legacy events without `usage` still deserialize, and `cost_usd` may
+  simply be null when no policy filled it.
+
+## What Is Recorded
+
+`EventData::Messages` gains an optional `usage` field holding a `UsageRecord`
+(the type is event schema and lives in `exoharness`; the store treats it as data):
+
+| Field                          | Meaning                                                                |
+| ------------------------------ | ---------------------------------------------------------------------- |
+| `model`                        | Model id echoed by the provider, falling back to the requested binding |
+| `prompt_tokens`                | Input tokens (provider convention varies — see per-provider math)      |
+| `completion_tokens`            | Output tokens                                                          |
+| `prompt_cached_tokens`         | Cache-read (discounted) input tokens                                   |
+| `prompt_cache_creation_tokens` | Cache-write input tokens (Anthropic)                                   |
+| `completion_reasoning_tokens`  | Reasoning tokens, when surfaced                                        |
+| `cost_usd`                     | USD cost filled by userspace policy; null if no policy computed it     |
+| `ttft_ms`                      | Time to first token (streaming path only)                              |
+| `duration_ms`                  | Wall-clock duration, request start to end of response                  |
+
+Every sub-field is `Option` with `skip_serializing_if`. The record is boxed
+inside `EventData` (`Option<Box<UsageRecord>>`) to keep the enum small and dodge
+`large_enum_variant`; `Box` is serde-transparent, so the on-disk JSON is
+unchanged. The two cache buckets are both kept because reads and writes bill at
+different rates — cost can't be reconstructed from a single collapsed number.
+They mirror lingua's `UniversalUsage` field names.
+
+## Layering: Who Computes Cost
+
+```
+TRUSTED SUBSTRATE                    USERSPACE (agent-side)
+─────────────────                    ──────────────────────
+exoharness store                     Rust executors (Basic/RLM) ─┐
+  • persists UsageRecord verbatim                                 ├─ cost library
+  • raw tokens + model + timing      TS harness / exoclaw ───────┘   (policy)
+  • cost_usd is just a field;                  │
+    NOTHING here computes it                   └─ fills cost_usd when building the
+  • no pricing dependency                         messages event (or leaves it null
+                                                  for a reader to compute later)
+```
+
+- The emitter (whoever made the call and holds the provider usage) builds the
+  `UsageRecord` and, as a policy choice, fills `cost_usd` by calling the cost
+  library. Rust executors call the Rust crate; the TS harness calls the TS port.
+- The substrate stores the record as-is. It has no pricing code and no dependency
+  on the cost library.
+- _When_ cost is computed is an application choice: at write (emitter fills it) or
+  left null for a read-time/report path. The substrate is agnostic either way.
+
+## The Cost Library
+
+There are two userspaces (Rust executors, the TS harness), so the cost library
+exists once per language. Each is **self-contained** — it owns both the math and
+its own data loading, and neither depends on the other having run:
+
+- **Rust:** a `cost` crate (price table, lookup, `compute_cost_usd`, loader), used
+  by the Basic executor. The CLI loads the table once at startup (`--pricing-path`
+  / `--pricing-url`, `EXO_LITELLM_PRICES_*` as env) and injects it. `exoharness`
+  does not depend on it.
+- **TypeScript:** a self-contained port (`@exo/model-runtime/cost`) used by the
+  exoclaw harness. It loads its own data through the harness's normal config flow
+  — reading `EXO_LITELLM_PRICES_PATH` from its inherited env, then its own cache,
+  then its own fetch — and computes cost when building the messages event. It does
+  **not** depend on the Rust loader having populated anything.
+
+Only the price _rates_ are a single source of truth (the upstream LiteLLM JSON);
+the short per-provider formula and the loader logic are duplicated per language.
+That duplication is the accepted cost of keeping cost a per-userspace policy
+rather than a substrate service, and of each userspace owning its own data so
+there is no cross-boundary coupling.
+
+### Loading the data
+
+Each loader resolves the table once and holds it as plain data, in this order:
+explicit `EXO_LITELLM_PRICES_PATH` (or `--pricing-path` on the CLI) → fresh
+on-disk cache (`$XDG_CACHE_HOME/exo/litellm_prices.json`, 24h TTL) → HTTP fetch
+(`EXO_LITELLM_PRICES_URL` or the LiteLLM default, cached on success) → stale cache
+→ none. A corrupt cache or unparseable fetch degrades to no table (cost stays
+null, tokens persist); the cache is written only when a fetched body parses. The
+two loaders share the same cache _path_ by convention, but neither requires the
+other to have written it.
+
+### Per-provider cost math
+
+Providers disagree on what `prompt_tokens` _includes_, and getting it wrong
+distorts cost by up to ~10× on cache-heavy requests:
+
+- **Anthropic-family** (`anthropic`, `vertex_ai-anthropic`, `azure_ai`):
+  `prompt_tokens` is **fresh** input only; cache reads and writes are separate and
+  additive — `prompt·in + cached·cache_read + cache_creation·cache_write + completion·out`.
+- **Everything else** (`openai`, `mistral`, Bedrock, …): `prompt_tokens` is the
+  **total** input; cached is a subset to subtract first —
+  `(prompt − cached)·in + cached·cache_read + completion·out`.
+
+The additive set is a narrow list of first-party Anthropic providers; everything
+else, including all of Bedrock, is treated as inclusive. Bedrock is not fully
+handled — Bedrock-hosted Claude is really additive, so its cache-heavy costs are
+off. Left as a TODO. Unclassified providers default to inclusive (safe when
+`cached == 0`, the common case).
+
+### Model lookup
+
+Exact match first, then the longest entry key that is a prefix of the requested
+model **at a token boundary** — the next character must be absent or a separator
+(`-` / `:`). This resolves dated revisions (`claude-sonnet-4-6-20251022` →
+`claude-sonnet-4-6`) while refusing to slide `gpt-4o-mini` onto a `gpt-4` entry
+when `gpt-4o` is missing, so a model is never silently priced at a neighbor's
+rate.
+
+## Testing
+
+- Cost library (Rust) unit tests: Anthropic additive (no cache / hits / creation),
+  inclusive (with/without cache, subtraction asserted), boundary-aware lookup
+  (incl. the `gpt-4o-mini` vs `gpt-4` non-match), `sample_spec` skip,
+  unknown-model null, provider classification (Bedrock → inclusive), and the
+  corrupt-cache-degrades-to-empty loader path.
+- TS cost port: a parity unit test over the same fixtures so the two formulas
+  stay in agreement.
+- Executors: assert the persisted `Messages` event carries the expected cost for
+  the Anthropic and inclusive paths, through a Rust executor and through the
+  exoclaw/TS path, so coverage is pinned on both userspaces.
+- `server_duration_ms` removed; the legacy-no-`usage` backward-compat test stays.
+
+## Not in Scope (Intentional)
+
+- **Trusted/attested usage.** Usage is agent-reported (see principles); making it
+  tamper-evident means routing model calls through the Rust core — a separate
+  change.
+- **`/cost` REPL surface.** Data is persisted; the UI is a follow-up. (It is one
+  natural read-time consumer of the cost library.)
+- **Cache-tier resolution.** lingua's `UniversalUsage` collapses Anthropic's
+  5-minute and 1-hour cache writes, so cost uses the 5-minute rate;
+  `cache_creation_input_token_cost_above_1hr` is parsed but unused.
+- **Long-running staleness.** The data loads once at startup; a long-lived service
+  would need a periodic refresh.
+- **Anthropic aggregate Admin API.** Org-level, aggregate-only; can't be tied to a
+  message.
+
+## Resolved Review Decisions
+
+All three review points are folded in:
+
+1. **Prefix-match boundary** (Alexsun1one) — adopted; lookup is boundary-aware.
+2. **Bedrock classification** (Alexsun1one) — Bedrock is treated as inclusive for
+   now; correct additive handling for Bedrock-hosted Claude is left as a TODO.
+3. **Env vars through clap** (ankrgyl) — the cost library loads at startup, so its
+   source is a CLI/clap-parsed input threaded in, with the `EXO_LITELLM_PRICES_*`
+   env vars kept as overrides. (Writing the "env vars via clap" convention up as a
+   repo skill remains a separate housekeeping task.)
diff --git a/examples/typescript/turn-loop.ts b/examples/typescript/turn-loop.ts
index 79a175a..d0f5d22 100644
--- a/examples/typescript/turn-loop.ts
+++ b/examples/typescript/turn-loop.ts
@@ -19,6 +19,7 @@ import {
   type ResponsesRuntimeLike,
   type TraceParent,
 } from "@exo/model-runtime/responses";
+import { ensureTable } from "@exo/model-runtime/cost";
 
 import { resolveLlmBinding } from "./shared";
 
@@ -34,6 +35,7 @@ export async function runResponsesHarnessTurn(
   context: TurnContext,
   options: ResponsesTurnLoopOptions = {},
 ): Promise<void> {
+  await ensureTable(); // load the price table once so cost is ready when events are built
   const modelBinding = await resolveLlmBinding(context);
   const runtime = runtimeFromModelBinding(context.agentConfig, modelBinding);
   await runtime.runTurn(context, (turnParent) =>
diff --git a/tsconfig.json b/tsconfig.json
index 452c477..1d14f24 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -16,6 +16,7 @@
       "@exo/model-runtime/responses": [
         "./typescript/model-runtime/responses.ts"
       ],
+      "@exo/model-runtime/cost": ["./typescript/model-runtime/cost.ts"],
       "@exo/codex/app-server": ["./typescript/codex/app-server.ts"],
       "@exo/cursor/sdk": ["./typescript/cursor/sdk.ts"],
       "@exo/cursor/protocol": ["./typescript/cursor/protocol.ts"]
diff --git a/typescript/harness/index.ts b/typescript/harness/index.ts
index 45847f6..2261299 100644
--- a/typescript/harness/index.ts
+++ b/typescript/harness/index.ts
@@ -437,11 +437,13 @@ export function assistantTextMessage(text: string): Message {
 export function messagesEvent(
   messages: Message[],
   responseId?: string,
+  usage?: JsonObject,
 ): EventData {
   return {
     type: "messages",
     messages,
     response_id: responseId,
+    ...(usage ? { usage } : {}),
   };
 }
 
diff --git a/typescript/model-runtime/cost.test.ts b/typescript/model-runtime/cost.test.ts
new file mode 100644
index 0000000..a31b751
--- /dev/null
+++ b/typescript/model-runtime/cost.test.ts
@@ -0,0 +1,75 @@
+import { describe, expect, it } from "vitest";
+
+import { computeCostUsd, lookup, parseTable } from "./cost";
+
+const FIXTURE = `{
+  "sample_spec": { "comment": "ignored" },
+  "claude-sonnet-4-6": {
+    "litellm_provider": "anthropic", "input_cost_per_token": 3e-06,
+    "output_cost_per_token": 1.5e-05, "cache_read_input_token_cost": 3e-07,
+    "cache_creation_input_token_cost": 3.75e-06
+  },
+  "gpt-4o-mini": {
+    "litellm_provider": "openai", "input_cost_per_token": 1.5e-07,
+    "output_cost_per_token": 6e-07, "cache_read_input_token_cost": 7.5e-08
+  },
+  "gpt-4": { "litellm_provider": "openai", "input_cost_per_token": 3e-05 },
+  "us.anthropic.claude-sonnet-4-6": {
+    "litellm_provider": "bedrock_converse", "input_cost_per_token": 3.3e-06,
+    "output_cost_per_token": 1.65e-05, "cache_read_input_token_cost": 3.3e-07
+  }
+}`;
+
+const table = parseTable(FIXTURE);
+
+describe("cost", () => {
+  it("skips sample_spec", () => {
+    expect(table.size).toBe(4);
+  });
+
+  it("resolves dated revisions, not neighbors", () => {
+    expect(lookup(table, "claude-sonnet-4-6-20251022")?.litellm_provider).toBe(
+      "anthropic",
+    );
+    expect(lookup(table, "gpt-4o")).toBeUndefined();
+    expect(lookup(table, "gpt-4-0613")).toBeDefined();
+  });
+
+  it("bills Anthropic additively (prompt excludes cached)", () => {
+    // 500 fresh + 10k read + 200 out = 0.0015 + 0.003 + 0.003
+    expect(
+      computeCostUsd(table, "claude-sonnet-4-6", {
+        prompt: 500,
+        completion: 200,
+        cached: 10_000,
+      }),
+    ).toBeCloseTo(0.0075, 12);
+  });
+
+  it("bills OpenAI inclusively (subtract cached)", () => {
+    expect(
+      computeCostUsd(table, "gpt-4o-mini", {
+        prompt: 2_000,
+        completion: 1_000,
+        cached: 500,
+      }),
+    ).toBeCloseTo(0.0008625, 12);
+  });
+
+  it("treats Bedrock as inclusive, not additive", () => {
+    const expected = 1_500 * 3.3e-6 + 500 * 3.3e-7 + 1_000 * 1.65e-5;
+    expect(
+      computeCostUsd(table, "us.anthropic.claude-sonnet-4-6", {
+        prompt: 2_000,
+        completion: 1_000,
+        cached: 500,
+      }),
+    ).toBeCloseTo(expected, 12);
+  });
+
+  it("returns null for unknown models", () => {
+    expect(
+      computeCostUsd(table, "acme-llm-9000", { prompt: 100, completion: 50 }),
+    ).toBeNull();
+  });
+});
diff --git a/typescript/model-runtime/cost.ts b/typescript/model-runtime/cost.ts
new file mode 100644
index 0000000..1ab3c00
--- /dev/null
+++ b/typescript/model-runtime/cost.ts
@@ -0,0 +1,184 @@
+// Cost policy for the TypeScript harness: a self-contained port of the `cost`
+// crate's math and loader. It owns its own price data (env override, on-disk
+// cache, or its own fetch) and does not depend on the Rust loader having run.
+
+import { mkdirSync, readFileSync, statSync, writeFileSync } from "node:fs";
+import { dirname } from "node:path";
+
+export type ModelEntry = {
+  litellm_provider?: string;
+  input_cost_per_token?: number;
+  output_cost_per_token?: number;
+  cache_read_input_token_cost?: number;
+  cache_creation_input_token_cost?: number;
+};
+
+export type PricingTable = Map<string, ModelEntry>;
+
+export type TokenCounts = {
+  prompt?: number;
+  completion?: number;
+  cached?: number;
+  cacheCreation?: number;
+};
+
+export function parseTable(json: string): PricingTable {
+  const raw = JSON.parse(json) as Record<string, unknown>;
+  const table: PricingTable = new Map();
+  for (const [key, value] of Object.entries(raw)) {
+    if (key !== "sample_spec" && value !== null && typeof value === "object") {
+      table.set(key, value as ModelEntry);
+    }
+  }
+  return table;
+}
+
+// Exact match, else the longest key that is a prefix of `model` at a token
+// boundary (next char absent or `-`/`:`).
+export function lookup(
+  table: PricingTable,
+  model: string,
+): ModelEntry | undefined {
+  const exact = table.get(model);
+  if (exact) return exact;
+  let best: ModelEntry | undefined;
+  let bestLen = -1;
+  for (const [key, entry] of table) {
+    const next = model[key.length];
+    if (
+      key.length > bestLen &&
+      model.startsWith(key) &&
+      (next === undefined || next === "-" || next === ":")
+    ) {
+      best = entry;
+      bestLen = key.length;
+    }
+  }
+  return best;
+}
+
+export function computeCostUsd(
+  table: PricingTable,
+  model: string,
+  tokens: TokenCounts,
+): number | null {
+  const entry = lookup(table, model);
+  if (!entry || entry.input_cost_per_token == null) return null;
+  const input = entry.input_cost_per_token;
+  const output = entry.output_cost_per_token ?? 0;
+  const cacheRead = entry.cache_read_input_token_cost ?? input;
+  const cacheWrite = entry.cache_creation_input_token_cost ?? input;
+
+  const prompt = Math.max(0, tokens.prompt ?? 0);
+  const completion = Math.max(0, tokens.completion ?? 0);
+  const cached = Math.max(0, tokens.cached ?? 0);
+  const created = Math.max(0, tokens.cacheCreation ?? 0);
+
+  // Anthropic-family `prompt_tokens` excludes cached (additive); else inclusive.
+  const fresh = isAdditive(entry.litellm_provider)
+    ? prompt
+    : Math.max(0, prompt - cached);
+  return (
+    fresh * input +
+    cached * cacheRead +
+    created * cacheWrite +
+    completion * output
+  );
+}
+
+function isAdditive(provider?: string): boolean {
+  return (
+    provider != null &&
+    (provider.startsWith("anthropic") ||
+      provider.startsWith("vertex_ai-anthropic") ||
+      provider === "azure_ai")
+  );
+}
+
+const DEFAULT_URL =
+  "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
+const CACHE_TTL_MS = 24 * 60 * 60 * 1000;
+const FETCH_TIMEOUT_MS = 5000;
+
+let table: PricingTable | null | undefined;
+let loading: Promise<PricingTable | null> | undefined;
+
+// The in-memory table once loaded; null until ensureTable() has resolved.
+export function getTable(): PricingTable | null {
+  return table ?? null;
+}
+
+// Loads the table once (memoized), owning its own data: env override -> fresh
+// cache -> own fetch (cached) -> stale cache -> null. Independent of Rust.
+export function ensureTable(): Promise<PricingTable | null> {
+  loading ??= load().then((loaded) => (table = loaded));
+  return loading;
+}
+
+async function load(): Promise<PricingTable | null> {
+  const override = process.env.EXO_LITELLM_PRICES_PATH;
+  if (override) return readTable(override);
+
+  const cache = cachePath();
+  if (cache && isFresh(cache)) {
+    const fresh = readTable(cache);
+    if (fresh) return fresh;
+  }
+  const fetched = await fetchTable(
+    process.env.EXO_LITELLM_PRICES_URL ?? DEFAULT_URL,
+  );
+  if (fetched) {
+    if (cache) writeCache(cache, fetched.body);
+    return fetched.table;
+  }
+  return cache ? readTable(cache) : null; // stale cache fallback
+}
+
+async function fetchTable(
+  url: string,
+): Promise<{ table: PricingTable; body: string } | null> {
+  const controller = new AbortController();
+  const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
+  try {
+    const response = await fetch(url, { signal: controller.signal });
+    if (!response.ok) return null;
+    const body = await response.text();
+    return { table: parseTable(body), body };
+  } catch {
+    return null; // network/parse failure -> fall back to cache or null
+  } finally {
+    clearTimeout(timer);
+  }
+}
+
+function readTable(path: string): PricingTable | null {
+  try {
+    return parseTable(readFileSync(path, "utf8"));
+  } catch {
+    return null; // missing or unparseable -> no table
+  }
+}
+
+function isFresh(path: string): boolean {
+  try {
+    return Date.now() - statSync(path).mtimeMs < CACHE_TTL_MS;
+  } catch {
+    return false;
+  }
+}
+
+function writeCache(path: string, body: string): void {
+  try {
+    mkdirSync(dirname(path), { recursive: true });
+    writeFileSync(path, body);
+  } catch {
+    // best effort; a missing cache just means we re-fetch next time
+  }
+}
+
+function cachePath(): string | null {
+  const base =
+    process.env.XDG_CACHE_HOME ??
+    (process.env.HOME ? `${process.env.HOME}/.cache` : null);
+  return base ? `${base}/exo/litellm_prices.json` : null;
+}
diff --git a/typescript/model-runtime/responses.ts b/typescript/model-runtime/responses.ts
index 6ac23ab..c761adf 100644
--- a/typescript/model-runtime/responses.ts
+++ b/typescript/model-runtime/responses.ts
@@ -32,6 +32,7 @@ import {
   type ToolDefinition,
   type TurnContext,
 } from "../harness";
+import { computeCostUsd, getTable } from "./cost";
 import type {
   ChatCompletion,
   ChatCompletionChunk,
@@ -952,7 +953,7 @@ export function responseToLinguaEvents(response: Response): EventData[] {
   const events: EventData[] = [];
   const messages = responseMessages(response);
   if (messages.length > 0) {
-    events.push(messagesEvent(messages));
+    events.push(messagesEvent(messages, undefined, usageRecord(response)));
   }
   for (const result of responseToolCallResults(response)) {
     if (result.type === "tool_call") {
@@ -969,6 +970,29 @@ export function responseToLinguaEvents(response: Response): EventData[] {
   return events;
 }
 
+// Policy: attach raw usage + cost to the messages event. cost_usd is filled from
+// the shared price cache; left unset if the cache is unavailable.
+function usageRecord(response: Response): JsonObject | undefined {
+  const usage = response.usage;
+  if (!usage) return undefined;
+  const prompt = usage.input_tokens;
+  const completion = usage.output_tokens;
+  const cached = usage.input_tokens_details?.cached_tokens;
+  const reasoning = usage.output_tokens_details?.reasoning_tokens;
+  const table = getTable();
+  const cost = table
+    ? computeCostUsd(table, response.model, { prompt, completion, cached })
+    : null;
+
+  const record: JsonObject = { model: response.model };
+  if (prompt != null) record.prompt_tokens = prompt;
+  if (completion != null) record.completion_tokens = completion;
+  if (cached != null) record.prompt_cached_tokens = cached;
+  if (reasoning != null) record.completion_reasoning_tokens = reasoning;
+  if (cost != null) record.cost_usd = cost;
+  return record;
+}
+
 export function responseStreamEventToLinguaEvents(
   event: ResponseStreamEvent,
 ): EventData[] {