Skip to content
5 changes: 5 additions & 0 deletions lm_service/stats_loggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ async def build_metrics_msg(self) -> dict[str, str]:
"Avg e2e time requests: %.3f ms, "
"Avg queue time requests: %.3f ms, "
"Avg prefill time requests: %.3f ms, "
"Avg preprocess time requests: %.3f ms, "
"Avg mean time per output token requests: %.3f ms, "
"Avg time to first token: %.3f ms, "
"Avg proxy ttft: %.3f ms, "
Expand All @@ -312,6 +313,10 @@ async def build_metrics_msg(self) -> dict[str, str]:
value.get("e2e_time_requests", 0.0),
value.get("queue_time_requests", 0.0),
value.get("prefill_time_requests", 0.0),
# preprocess time = ttft - queue - prefill
value.get("time_to_first_token", 0.0)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

能不能不要用减的方式,直接增加打点把preprocess时间计算一下。

- value.get("queue_time_requests", 0.0)
- value.get("prefill_time_requests", 0.0),
value.get("mean_time_per_output_token_requests", 0.0),
value.get("time_to_first_token", 0.0)
if self.has_d_instance()
Expand Down
Loading