From 95b4ffbac34cc06fbc94eeae9e06c7b4a2a94afe Mon Sep 17 00:00:00 2001 From: Evrard-Nil Daillet Date: Mon, 30 Mar 2026 20:39:25 -0700 Subject: [PATCH 1/2] fix: classify no-usage-stats log by stream completion state Client disconnects and provider errors are expected to lack usage stats since external providers (OpenAI, Anthropic) only send usage in the final SSE chunk. These now log at WARN level instead of ERROR. Only truly completed streams that lack usage stats remain ERROR, which indicates an actual provider bug worth investigating. Reduces ~12k false-positive ERROR logs/day in prod. --- .gitignore | 1 + crates/services/src/completions/mod.rs | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 11b6a6f39..a20449b7f 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ oci.tar .claude .DS_Store secret +repro_*.sh diff --git a/crates/services/src/completions/mod.rs b/crates/services/src/completions/mod.rs index f5b133a36..3a8b6cc3b 100644 --- a/crates/services/src/completions/mod.rs +++ b/crates/services/src/completions/mod.rs @@ -171,11 +171,24 @@ where chat_id.clone(), ), (None, None) => { - tracing::error!(%organization_id, %model_id, "Stream ended but no usage stats and no chat_id available"); + // Distinguish client disconnect / provider error from truly unexpected cases. + // Client disconnects and provider errors are expected — usage is only sent + // in the final chunk, so an interrupted stream will never have it. + if !self.stream_completed { + tracing::warn!(%organization_id, %model_id, stream_error = self.last_error.is_some(), + "Stream interrupted before usage stats or chat_id received (client disconnect or provider error)"); + } else { + tracing::error!(%organization_id, %model_id, "Stream completed but no usage stats and no chat_id available"); + } return; } (None, Some(chat_id)) => { - tracing::error!(%chat_id, %organization_id, %model_id, "Stream ended but no usage stats available"); + if !self.stream_completed { + tracing::warn!(%chat_id, %organization_id, %model_id, stream_error = self.last_error.is_some(), + "Stream interrupted before usage stats received (client disconnect or provider error)"); + } else { + tracing::error!(%chat_id, %organization_id, %model_id, "Stream completed but no usage stats available"); + } return; } (Some(usage), None) => { From 7a86b013d8f0d6e62f7e5771986a989144a7f022 Mon Sep 17 00:00:00 2001 From: Evrard-Nil Daillet Date: Mon, 30 Mar 2026 20:47:43 -0700 Subject: [PATCH 2/2] fix: update stream_completed doc comment per review --- crates/services/src/completions/mod.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/services/src/completions/mod.rs b/crates/services/src/completions/mod.rs index 3a8b6cc3b..3e0ebfba6 100644 --- a/crates/services/src/completions/mod.rs +++ b/crates/services/src/completions/mod.rs @@ -83,8 +83,9 @@ where last_usage_stats: Option, /// Last chat ID from streaming chunks (for attestation and inference_id) last_chat_id: Option, - /// Flag indicating the stream completed normally (received None from inner stream) - /// If false when Drop is called, the client disconnected mid-stream + /// Flag indicating the stream completed normally (received None from inner stream). + /// If false when Drop is called, the stream was interrupted — either the client + /// disconnected mid-stream or the provider returned an error (check `last_error`). stream_completed: bool, /// Response ID when called from Responses API (for usage tracking FK) response_id: Option,