Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 131 additions & 33 deletions crates/tui/src/pricing.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Cost estimation for DeepSeek API usage.
//! Cost estimation for API usage.
//!
//! Pricing based on DeepSeek's published rates (per million tokens).
//! Pricing is stored per million tokens. DeepSeek/Xiaomi MiMo rows include
//! their published CNY rates; OpenRouter-curated rows are USD-only.

#[cfg(test)]
use chrono::TimeZone;
Expand Down Expand Up @@ -32,7 +33,7 @@ impl CostCurrency {
}
}

/// Cost estimate in the two official DeepSeek pricing currencies.
/// Cost estimate in displayable currencies.
#[derive(Debug, Clone, Copy, Default, PartialEq)]
pub struct CostEstimate {
pub usd: f64,
Expand Down Expand Up @@ -98,29 +99,33 @@ struct CurrencyPricing {
output_per_million: f64,
}

/// Per-million-token pricing for a model in both official currencies.
/// Per-million-token pricing for a model.
#[derive(Debug, Clone, Copy)]
struct ModelPricing {
usd: CurrencyPricing,
cny: CurrencyPricing,
cny: Option<CurrencyPricing>,
}

/// Look up pricing for a model name.
fn pricing_for_model(model: &str) -> Option<ModelPricing> {
pricing_for_model_at(model, Utc::now())
}

/// Return whether a model has a row in the pricing table.
#[must_use]
pub fn has_pricing_for_model(model: &str) -> bool {
pricing_for_model(model).is_some()
}

fn pricing_for_model_at(model: &str, _now: DateTime<Utc>) -> Option<ModelPricing> {
let lower = model.to_lowercase();
if lower.starts_with("deepseek-ai/") {
// NVIDIA NIM-hosted DeepSeek uses NVIDIA's catalog/account terms, not
// DeepSeek Platform pricing. Avoid showing misleading DeepSeek costs.
return None;
}
match lower.as_str() {
"xiaomi/mimo-v2.5-pro" | "mimo-v2.5-pro" => return Some(deepseek_v4_pro_pricing()),
"xiaomi/mimo-v2.5" | "mimo-v2.5" => return Some(deepseek_v4_flash_pricing()),
_ => {}
if let Some(pricing) = known_pricing_for_model(&lower) {
return Some(pricing);
}
if lower.contains("deepseek") {
if lower.contains("v4-pro") || lower.contains("v4pro") {
Expand All @@ -136,18 +141,67 @@ fn pricing_for_model_at(model: &str, _now: DateTime<Utc>) -> Option<ModelPricing
}
}

fn known_pricing_for_model(model_lower: &str) -> Option<ModelPricing> {
match model_lower {
"xiaomi/mimo-v2.5-pro" | "mimo-v2.5-pro" => Some(deepseek_v4_pro_pricing()),
"xiaomi/mimo-v2.5" | "mimo-v2.5" => Some(deepseek_v4_flash_pricing()),

// USD rows below mirror the curated OpenRouter catalog. Prices are
// sourced from OpenRouter's per-token API fields and multiplied to the
// per-million-token units this module uses.
"moonshotai/kimi-k2.6" | "kimi-k2.6" => Some(usd_only_pricing(0.34, 0.68, 3.41)),
"z-ai/glm-5.1" | "glm-5.1" => Some(usd_only_pricing(0.182, 0.98, 3.08)),
"minimax/minimax-m3" | "minimax-m3" => Some(usd_only_pricing(0.06, 0.30, 1.20)),
"arcee-ai/trinity-large-thinking" | "trinity-large-thinking" => {
Some(usd_only_pricing(0.06, 0.22, 0.85))
}
"openai/gpt-5.5" | "gpt-5.5" => Some(usd_only_pricing(0.50, 5.00, 30.00)),
"openai/gpt-5.5-pro" | "gpt-5.5-pro" => Some(usd_only_pricing(30.00, 30.00, 180.00)),

"qwen/qwen3.6-flash" => Some(usd_only_pricing(0.1875, 0.1875, 1.125)),
"qwen/qwen3.6-35b-a3b" => Some(usd_only_pricing(0.05, 0.15, 1.00)),
"qwen/qwen3.6-max-preview" => Some(usd_only_pricing(1.04, 1.04, 6.24)),
"qwen/qwen3.6-27b" => Some(usd_only_pricing(0.2885, 0.2885, 3.17)),
"qwen/qwen3.6-plus" => Some(usd_only_pricing(0.325, 0.325, 1.95)),
"qwen/qwen3.7-max" => Some(usd_only_pricing(0.25, 1.25, 3.75)),

"google/gemma-4-31b-it" => Some(usd_only_pricing(0.09, 0.12, 0.35)),
"google/gemma-4-26b-a4b-it" => Some(usd_only_pricing(0.06, 0.06, 0.33)),
"tencent/hy3-preview" => Some(usd_only_pricing(0.021, 0.063, 0.21)),
Comment on lines +161 to +170

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Several newly added models (Qwen, Google Gemma, and Tencent) are only matched using their full provider-prefixed names (e.g., "qwen/qwen3.6-flash"). They lack the short aliases (e.g., "qwen3.6-flash") that other models like Kimi, GLM, and Minimax have. If a user configures or runs these models using their short names, the pricing lookup will fail, resulting in no pricing data being displayed.

Suggested change
"qwen/qwen3.6-flash" => Some(usd_only_pricing(0.1875, 0.1875, 1.125)),
"qwen/qwen3.6-35b-a3b" => Some(usd_only_pricing(0.05, 0.15, 1.00)),
"qwen/qwen3.6-max-preview" => Some(usd_only_pricing(1.04, 1.04, 6.24)),
"qwen/qwen3.6-27b" => Some(usd_only_pricing(0.2885, 0.2885, 3.17)),
"qwen/qwen3.6-plus" => Some(usd_only_pricing(0.325, 0.325, 1.95)),
"qwen/qwen3.7-max" => Some(usd_only_pricing(0.25, 1.25, 3.75)),
"google/gemma-4-31b-it" => Some(usd_only_pricing(0.09, 0.12, 0.35)),
"google/gemma-4-26b-a4b-it" => Some(usd_only_pricing(0.06, 0.06, 0.33)),
"tencent/hy3-preview" => Some(usd_only_pricing(0.021, 0.063, 0.21)),
"qwen/qwen3.6-flash" | "qwen3.6-flash" => Some(usd_only_pricing(0.1875, 0.1875, 1.125)),
"qwen/qwen3.6-35b-a3b" | "qwen3.6-35b-a3b" => Some(usd_only_pricing(0.05, 0.15, 1.00)),
"qwen/qwen3.6-max-preview" | "qwen3.6-max-preview" => Some(usd_only_pricing(1.04, 1.04, 6.24)),
"qwen/qwen3.6-27b" | "qwen3.6-27b" => Some(usd_only_pricing(0.2885, 0.2885, 3.17)),
"qwen/qwen3.6-plus" | "qwen3.6-plus" => Some(usd_only_pricing(0.325, 0.325, 1.95)),
"qwen/qwen3.7-max" | "qwen3.7-max" => Some(usd_only_pricing(0.25, 1.25, 3.75)),
"google/gemma-4-31b-it" | "gemma-4-31b-it" => Some(usd_only_pricing(0.09, 0.12, 0.35)),
"google/gemma-4-26b-a4b-it" | "gemma-4-26b-a4b-it" => Some(usd_only_pricing(0.06, 0.06, 0.33)),
"tencent/hy3-preview" | "hy3-preview" => Some(usd_only_pricing(0.021, 0.063, 0.21)),

"nvidia/nemotron-3-ultra-550b-a55b" | "nvidia/nemotron-3-ultra" => {
Some(usd_only_pricing(0.15, 0.50, 2.50))
}
_ => None,
}
}

fn usd_only_pricing(
input_cache_hit_per_million: f64,
input_cache_miss_per_million: f64,
output_per_million: f64,
) -> ModelPricing {
ModelPricing {
usd: CurrencyPricing {
input_cache_hit_per_million,
input_cache_miss_per_million,
output_per_million,
},
cny: None,
}
}

fn deepseek_v4_pro_pricing() -> ModelPricing {
ModelPricing {
usd: CurrencyPricing {
input_cache_hit_per_million: 0.003625,
input_cache_miss_per_million: 0.435,
output_per_million: 0.87,
},
cny: CurrencyPricing {
cny: Some(CurrencyPricing {
input_cache_hit_per_million: 0.025,
input_cache_miss_per_million: 3.0,
output_per_million: 6.0,
},
}),
}
}

Expand All @@ -158,11 +212,11 @@ fn deepseek_v4_flash_pricing() -> ModelPricing {
input_cache_miss_per_million: 0.14,
output_per_million: 0.28,
},
cny: CurrencyPricing {
cny: Some(CurrencyPricing {
input_cache_hit_per_million: 0.02,
input_cache_miss_per_million: 1.0,
output_per_million: 2.0,
},
}),
}
}

Expand Down Expand Up @@ -201,7 +255,10 @@ pub fn calculate_turn_cost_estimate(
let pricing = pricing_for_model(model)?;
Some(CostEstimate {
usd: calculate_turn_cost_with_pricing(pricing.usd, input_tokens, output_tokens),
cny: calculate_turn_cost_with_pricing(pricing.cny, input_tokens, output_tokens),
cny: pricing
.cny
.map(|pricing| calculate_turn_cost_with_pricing(pricing, input_tokens, output_tokens))
.unwrap_or(0.0),
})
}

Expand All @@ -227,7 +284,10 @@ pub fn calculate_turn_cost_estimate_from_usage(model: &str, usage: &Usage) -> Op
let pricing = pricing_for_model(model)?;
Some(CostEstimate {
usd: calculate_turn_cost_from_usage_with_pricing(pricing.usd, usage),
cny: calculate_turn_cost_from_usage_with_pricing(pricing.cny, usage),
cny: pricing
.cny
.map(|pricing| calculate_turn_cost_from_usage_with_pricing(pricing, usage))
.unwrap_or(0.0),
})
}

Expand Down Expand Up @@ -262,8 +322,13 @@ pub fn calculate_cache_savings(model: &str, cache_hit_tokens: u32) -> Option<Cos
Some(CostEstimate {
usd: tokens
* (pricing.usd.input_cache_miss_per_million - pricing.usd.input_cache_hit_per_million),
cny: tokens
* (pricing.cny.input_cache_miss_per_million - pricing.cny.input_cache_hit_per_million),
cny: pricing
.cny
.map(|pricing| {
tokens
* (pricing.input_cache_miss_per_million - pricing.input_cache_hit_per_million)
})
.unwrap_or(0.0),
})
}

Expand Down Expand Up @@ -326,7 +391,35 @@ mod tests {
#[test]
fn input_cost_note_unknown_model_returns_none() {
assert!(input_cost_note("llama3.3:70b").is_none());
assert!(input_cost_note("moonshotai/kimi-k2.6").is_none());
}

#[test]
fn curated_usd_only_models_have_pricing_and_accrue_cost() {
let usage = Usage {
input_tokens: 1_000_000,
output_tokens: 500_000,
prompt_cache_hit_tokens: Some(250_000),
prompt_cache_miss_tokens: Some(750_000),
..Default::default()
};
for (model, hit, miss, output) in [
("kimi-k2.6", 0.34, 0.68, 3.41),
("z-ai/glm-5.1", 0.182, 0.98, 3.08),
("qwen/qwen3.6-plus", 0.325, 0.325, 1.95),
("trinity-large-thinking", 0.06, 0.22, 0.85),
("gpt-5.5", 0.50, 5.00, 30.00),
] {
let pricing = pricing_for_model_at(model, Utc::now()).expect(model);
assert_eq!(pricing.usd.input_cache_hit_per_million, hit);
assert_eq!(pricing.usd.input_cache_miss_per_million, miss);
assert_eq!(pricing.usd.output_per_million, output);
assert!(pricing.cny.is_none());
assert!(has_pricing_for_model(model));

let estimate = calculate_turn_cost_estimate_from_usage(model, &usage).expect(model);
assert!(estimate.usd > 0.0, "expected positive USD for {model}");
assert_eq!(estimate.cny, 0.0);
}
}

#[test]
Expand All @@ -340,9 +433,10 @@ mod tests {
assert_eq!(pricing.usd.input_cache_hit_per_million, 0.003625);
assert_eq!(pricing.usd.input_cache_miss_per_million, 0.435);
assert_eq!(pricing.usd.output_per_million, 0.87);
assert_eq!(pricing.cny.input_cache_hit_per_million, 0.025);
assert_eq!(pricing.cny.input_cache_miss_per_million, 3.0);
assert_eq!(pricing.cny.output_per_million, 6.0);
let cny = pricing.cny.expect("DeepSeek pricing has CNY");
assert_eq!(cny.input_cache_hit_per_million, 0.025);
assert_eq!(cny.input_cache_miss_per_million, 3.0);
assert_eq!(cny.output_per_million, 6.0);
}

#[test]
Expand All @@ -353,9 +447,10 @@ mod tests {
assert_eq!(pricing.usd.input_cache_hit_per_million, 0.003625);
assert_eq!(pricing.usd.input_cache_miss_per_million, 0.435);
assert_eq!(pricing.usd.output_per_million, 0.87);
assert_eq!(pricing.cny.input_cache_hit_per_million, 0.025);
assert_eq!(pricing.cny.input_cache_miss_per_million, 3.0);
assert_eq!(pricing.cny.output_per_million, 6.0);
let cny = pricing.cny.expect("DeepSeek pricing has CNY");
assert_eq!(cny.input_cache_hit_per_million, 0.025);
assert_eq!(cny.input_cache_miss_per_million, 3.0);
assert_eq!(cny.output_per_million, 6.0);
}

#[test]
Expand All @@ -378,9 +473,10 @@ mod tests {
assert_eq!(pricing.usd.input_cache_hit_per_million, 0.0028);
assert_eq!(pricing.usd.input_cache_miss_per_million, 0.14);
assert_eq!(pricing.usd.output_per_million, 0.28);
assert_eq!(pricing.cny.input_cache_hit_per_million, 0.02);
assert_eq!(pricing.cny.input_cache_miss_per_million, 1.0);
assert_eq!(pricing.cny.output_per_million, 2.0);
let cny = pricing.cny.expect("DeepSeek pricing has CNY");
assert_eq!(cny.input_cache_hit_per_million, 0.02);
assert_eq!(cny.input_cache_miss_per_million, 1.0);
assert_eq!(cny.output_per_million, 2.0);
}

#[test]
Expand All @@ -391,17 +487,19 @@ mod tests {
assert_eq!(pro_pricing.usd.input_cache_hit_per_million, 0.003625);
assert_eq!(pro_pricing.usd.input_cache_miss_per_million, 0.435);
assert_eq!(pro_pricing.usd.output_per_million, 0.87);
assert_eq!(pro_pricing.cny.input_cache_hit_per_million, 0.025);
assert_eq!(pro_pricing.cny.input_cache_miss_per_million, 3.0);
assert_eq!(pro_pricing.cny.output_per_million, 6.0);
let pro_cny = pro_pricing.cny.expect("MiMo pricing has CNY");
assert_eq!(pro_cny.input_cache_hit_per_million, 0.025);
assert_eq!(pro_cny.input_cache_miss_per_million, 3.0);
assert_eq!(pro_cny.output_per_million, 6.0);

let flash_pricing = pricing_for_model_at("xiaomi/mimo-v2.5", now).unwrap();
assert_eq!(flash_pricing.usd.input_cache_hit_per_million, 0.0028);
assert_eq!(flash_pricing.usd.input_cache_miss_per_million, 0.14);
assert_eq!(flash_pricing.usd.output_per_million, 0.28);
assert_eq!(flash_pricing.cny.input_cache_hit_per_million, 0.02);
assert_eq!(flash_pricing.cny.input_cache_miss_per_million, 1.0);
assert_eq!(flash_pricing.cny.output_per_million, 2.0);
let flash_cny = flash_pricing.cny.expect("MiMo pricing has CNY");
assert_eq!(flash_cny.input_cache_hit_per_million, 0.02);
assert_eq!(flash_cny.input_cache_miss_per_million, 1.0);
assert_eq!(flash_cny.output_per_million, 2.0);
}

#[test]
Expand Down
69 changes: 64 additions & 5 deletions crates/tui/src/tui/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2609,7 +2609,7 @@ impl App {

/// Read the visible session+sub-agent cost in the chosen currency.
pub fn displayed_session_cost_for_currency(&self, currency: CostCurrency) -> f64 {
match currency {
match self.cost_display_currency(currency) {
CostCurrency::Usd => {
let current = self.session.session_cost + self.session.subagent_cost;
current.max(self.session.displayed_cost_high_water)
Expand All @@ -2622,25 +2622,43 @@ impl App {
}

pub fn session_cost_for_currency(&self, currency: CostCurrency) -> f64 {
match currency {
match self.cost_display_currency(currency) {
CostCurrency::Usd => self.session.session_cost,
CostCurrency::Cny => self.session.session_cost_cny,
}
}

pub fn subagent_cost_for_currency(&self, currency: CostCurrency) -> f64 {
match currency {
match self.cost_display_currency(currency) {
CostCurrency::Usd => self.session.subagent_cost,
CostCurrency::Cny => self.session.subagent_cost_cny,
}
}

pub fn format_cost_amount(&self, amount: f64) -> String {
crate::pricing::format_cost_amount(amount, self.cost_currency)
crate::pricing::format_cost_amount(amount, self.cost_display_currency(self.cost_currency))
}

pub fn format_cost_amount_precise(&self, amount: f64) -> String {
crate::pricing::format_cost_amount_precise(amount, self.cost_currency)
crate::pricing::format_cost_amount_precise(
amount,
self.cost_display_currency(self.cost_currency),
)
}

fn cost_display_currency(&self, currency: CostCurrency) -> CostCurrency {
if currency == CostCurrency::Cny
&& self.session.session_cost_cny == 0.0
&& self.session.subagent_cost_cny == 0.0
&& self.session.displayed_cost_high_water_cny == 0.0
&& (self.session.session_cost > 0.0
|| self.session.subagent_cost > 0.0
|| self.session.displayed_cost_high_water > 0.0)
{
CostCurrency::Usd
} else {
currency
}
}

/// Estimated cost saved by the last turn's cache-hit tokens in the
Expand All @@ -2651,6 +2669,9 @@ impl App {
let estimate = crate::pricing::calculate_cache_savings(&self.model, hit_tokens)?;
Some(match self.cost_currency {
crate::pricing::CostCurrency::Usd => estimate.usd,
crate::pricing::CostCurrency::Cny if estimate.cny == 0.0 && estimate.usd > 0.0 => {
estimate.usd
}
crate::pricing::CostCurrency::Cny => estimate.cny,
})
Comment on lines 2670 to 2676

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

When self.cost_currency is Cny, but the model is USD-only (meaning estimate.cny == 0.0 and estimate.usd > 0.0), returning estimate.usd directly can cause a formatting bug. If the app's active display currency has been resolved to Cny (due to previously accrued CNY costs in the session), format_cost_amount will format this returned USD value with the CNY symbol (¥), displaying an incorrect and misleading value (e.g., ¥0.34 instead of $0.34). Matching against self.cost_display_currency(self.cost_currency) ensures that we only return USD savings when the display currency is actually resolved to USD.

        Some(match self.cost_display_currency(self.cost_currency) {
            crate::pricing::CostCurrency::Usd => estimate.usd,
            crate::pricing::CostCurrency::Cny => estimate.cny,
        })

}
Expand Down Expand Up @@ -5514,6 +5535,44 @@ mod tests {
assert_eq!(app.compact_threshold, 209_715);
}

#[test]
fn cny_display_falls_back_to_usd_for_usd_only_costs() {
let mut app = App::new(test_options(false), &Config::default());
app.cost_currency = CostCurrency::Cny;
app.accrue_session_cost_estimate(CostEstimate::usd_only(0.42));

let displayed = app.displayed_session_cost_for_currency(CostCurrency::Cny);

assert_eq!(displayed, 0.42);
assert_eq!(app.session_cost_for_currency(CostCurrency::Cny), 0.42);
assert_eq!(app.format_cost_amount(displayed), "$0.42");
}

#[test]
fn cny_display_keeps_cny_when_costs_have_cny_rates() {
let mut app = App::new(test_options(false), &Config::default());
app.cost_currency = CostCurrency::Cny;
app.accrue_session_cost_estimate(CostEstimate {
usd: 0.42,
cny: 2.5,
});

let displayed = app.displayed_session_cost_for_currency(CostCurrency::Cny);

assert_eq!(displayed, 2.5);
assert_eq!(app.format_cost_amount(displayed), "¥2.50");
}

#[test]
fn cny_cache_savings_falls_back_to_usd_for_usd_only_models() {
let mut app = App::new(test_options(false), &Config::default());
app.cost_currency = CostCurrency::Cny;
app.model = "kimi-k2.6".to_string();
app.session.last_prompt_cache_hit_tokens = Some(1_000_000);

assert_eq!(app.last_turn_cache_savings(), Some(0.34));
}

#[test]
fn sidebar_focus_accepts_work_and_maps_legacy_trackers_to_work() {
assert_eq!(SidebarFocus::from_setting("auto"), SidebarFocus::Auto);
Expand Down
Loading
Loading