fix: always populate mot.usage in HuggingFace backend (#694)

planetf1 · planetf1 · commit f1f909d2a7fd · 2026-03-18T18:26:44.000Z
Token count extraction in _post_process_async was gated behind
`span is not None or metrics_enabled`, so mot.usage was never
populated in plain (non-telemetry) runs. Now extracted unconditionally
— usage is a standard mot field, not a telemetry concern.
diff --git a/mellea/backends/huggingface.py b/mellea/backends/huggingface.py
@@ -1133,16 +1133,12 @@ class used during generation, if any.
         )
 
         span = mot._meta.get("_telemetry_span")
-        from ..telemetry.metrics import is_metrics_enabled
 
-        metrics_enabled = is_metrics_enabled()
-
-        # Extract token counts only if needed
+        # Extract token counts from the HF output sequences.
+        # Always computed (usage is a standard mot field, not a telemetry concern).
         hf_output = mot._meta.get("hf_output")
         n_prompt, n_completion = None, None
-        if (span is not None or metrics_enabled) and isinstance(
-            hf_output, GenerateDecoderOnlyOutput
-        ):
+        if isinstance(hf_output, GenerateDecoderOnlyOutput):
             # HuggingFace local models don't provide usage objects, but we can
             # calculate token counts from sequences
             try: