From 28a14c09b106a0174b6aba114251f3c42012aa53 Mon Sep 17 00:00:00 2001
From: Nigel Jones <jonesn@uk.ibm.com>
Date: Wed, 18 Mar 2026 18:05:27 +0000
Subject: [PATCH] fix: always populate mot.usage in HuggingFace backend (#694)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Token count extraction in _post_process_async was gated behind
`span is not None or metrics_enabled`, so mot.usage was never
populated in plain (non-telemetry) runs. Now extracted unconditionally
— usage is a standard mot field, not a telemetry concern.
---
 mellea/backends/huggingface.py | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/mellea/backends/huggingface.py b/mellea/backends/huggingface.py
index 424d5b2f3..e6236e5c6 100644
--- a/mellea/backends/huggingface.py
+++ b/mellea/backends/huggingface.py
@@ -1133,18 +1133,11 @@ class used during generation, if any.
         )
 
         span = mot._meta.get("_telemetry_span")
-        from ..telemetry.metrics import is_metrics_enabled
 
-        metrics_enabled = is_metrics_enabled()
-
-        # Extract token counts only if needed
+        # Derive token counts from the output sequences (HF models have no usage object).
         hf_output = mot._meta.get("hf_output")
         n_prompt, n_completion = None, None
-        if (span is not None or metrics_enabled) and isinstance(
-            hf_output, GenerateDecoderOnlyOutput
-        ):
-            # HuggingFace local models don't provide usage objects, but we can
-            # calculate token counts from sequences
+        if isinstance(hf_output, GenerateDecoderOnlyOutput):
             try:
                 if input_ids is not None and hf_output.sequences is not None:
                     n_prompt = input_ids.shape[1]
@@ -1152,7 +1145,6 @@ class used during generation, if any.
             except Exception:
                 pass
 
-        # Populate standardized usage field (convert to OpenAI format)
         if n_prompt is not None and n_completion is not None:
             mot.usage = {
                 "prompt_tokens": n_prompt,