Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 2 additions & 10 deletions mellea/backends/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -1133,26 +1133,18 @@ class used during generation, if any.
)

span = mot._meta.get("_telemetry_span")
from ..telemetry.metrics import is_metrics_enabled

metrics_enabled = is_metrics_enabled()

# Extract token counts only if needed
# Derive token counts from the output sequences (HF models have no usage object).
hf_output = mot._meta.get("hf_output")
n_prompt, n_completion = None, None
if (span is not None or metrics_enabled) and isinstance(
hf_output, GenerateDecoderOnlyOutput
):
# HuggingFace local models don't provide usage objects, but we can
# calculate token counts from sequences
if isinstance(hf_output, GenerateDecoderOnlyOutput):
try:
if input_ids is not None and hf_output.sequences is not None:
n_prompt = input_ids.shape[1]
n_completion = hf_output.sequences[0].shape[0] - n_prompt
except Exception:
pass

# Populate standardized usage field (convert to OpenAI format)
if n_prompt is not None and n_completion is not None:
mot.usage = {
"prompt_tokens": n_prompt,
Expand Down
Loading