From 28a14c09b106a0174b6aba114251f3c42012aa53 Mon Sep 17 00:00:00 2001 From: Nigel Jones Date: Wed, 18 Mar 2026 18:05:27 +0000 Subject: [PATCH] fix: always populate mot.usage in HuggingFace backend (#694) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Token count extraction in _post_process_async was gated behind `span is not None or metrics_enabled`, so mot.usage was never populated in plain (non-telemetry) runs. Now extracted unconditionally — usage is a standard mot field, not a telemetry concern. --- mellea/backends/huggingface.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/mellea/backends/huggingface.py b/mellea/backends/huggingface.py index 424d5b2f3..e6236e5c6 100644 --- a/mellea/backends/huggingface.py +++ b/mellea/backends/huggingface.py @@ -1133,18 +1133,11 @@ class used during generation, if any. ) span = mot._meta.get("_telemetry_span") - from ..telemetry.metrics import is_metrics_enabled - metrics_enabled = is_metrics_enabled() - - # Extract token counts only if needed + # Derive token counts from the output sequences (HF models have no usage object). hf_output = mot._meta.get("hf_output") n_prompt, n_completion = None, None - if (span is not None or metrics_enabled) and isinstance( - hf_output, GenerateDecoderOnlyOutput - ): - # HuggingFace local models don't provide usage objects, but we can - # calculate token counts from sequences + if isinstance(hf_output, GenerateDecoderOnlyOutput): try: if input_ids is not None and hf_output.sequences is not None: n_prompt = input_ids.shape[1] @@ -1152,7 +1145,6 @@ class used during generation, if any. except Exception: pass - # Populate standardized usage field (convert to OpenAI format) if n_prompt is not None and n_completion is not None: mot.usage = { "prompt_tokens": n_prompt,