Skip to content

Commit

Permalink
Accurately capture the time of loading weights
Browse files Browse the repository at this point in the history
Signed-off-by: Jun Duan <[email protected]>
  • Loading branch information
waltforme committed Feb 28, 2025
1 parent 4be4b26 commit cbe98bf
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
11 changes: 11 additions & 0 deletions vllm/model_executor/model_loader/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import itertools
import math
import os
import time
import warnings
from abc import ABC, abstractmethod
from contextlib import contextmanager
Expand Down Expand Up @@ -216,6 +217,9 @@ class Source:
allow_patterns_overrides: Optional[list[str]] = None
"""If defined, weights will load exclusively using these patterns."""

counter_before_loading_weights: float = 0.0
counter_after_loading_weights: float = 0.0

def __init__(self, load_config: LoadConfig):
super().__init__(load_config)
if load_config.model_loader_extra_config:
Expand Down Expand Up @@ -364,6 +368,8 @@ def _xla_weights_iterator(iterator: Generator):

weights_iterator = _xla_weights_iterator(weights_iterator)

if self.counter_before_loading_weights == 0.0:
self.counter_before_loading_weights = time.perf_counter()
# Apply the prefix.
return ((source.prefix + name, tensor)
for (name, tensor) in weights_iterator)
Expand Down Expand Up @@ -408,6 +414,11 @@ def load_model(self, vllm_config: VllmConfig) -> nn.Module:
weights_to_load = {name for name, _ in model.named_parameters()}
loaded_weights = model.load_weights(
self._get_all_weights(model_config, model))
self.counter_after_loading_weights = time.perf_counter()
logger.info(
"Loading weights took %.2f seconds",
self.counter_after_loading_weights -
self.counter_before_loading_weights)
# We only enable strict check for non-quantized models
# that have loaded weights tracking currently.
if model_config.quantization is None and loaded_weights is not None:
Expand Down
2 changes: 1 addition & 1 deletion vllm/v1/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1063,7 +1063,7 @@ def load_model(self) -> None:
self.device)
time_after_load = time.perf_counter()
self.model_memory_usage = m.consumed_memory
logger.info("Loading model weights took %.4f GB and %.6f seconds",
logger.info("Loading model took %.4f GB and %.6f seconds",
self.model_memory_usage / float(2**30),
time_after_load - time_before_load)

Expand Down

0 comments on commit cbe98bf

Please sign in to comment.