Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,18 @@ def infer_loop(self):
run_way = self.control_state_machine.select_run_way(prefill_reqs=prefill_reqs, decode_reqs=decode_reqs)

if run_way.is_prefill():
# 进行一次流同步,保证 _try_read_new_reqs 中的一些算子操作,必然已经完成。
# 防止后续的推理流程读取到显存中可能存在错误的数据。
g_infer_context.get_overlap_stream().wait_stream(torch.cuda.current_stream())
self.prefill(
event_pack=event_pack,
prefill_reqs=prefill_reqs,
)
continue
elif run_way.is_decode():
# 进行一次流同步,保证 _try_read_new_reqs 中的一些算子操作,必然已经完成。
# 防止后续的推理流程读取到显存中可能存在错误的数据。
g_infer_context.get_overlap_stream().wait_stream(torch.cuda.current_stream())
self.decode(
event_pack=event_pack,
decode_reqs=decode_reqs,
Expand Down
15 changes: 15 additions & 0 deletions test/benchmark/static_inference/profile_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import torch
import numpy as np
from torch.profiler import profile, record_function, ProfilerActivity

torch.cuda.synchronize()
with profile(
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
record_shapes=False,
profile_memory=False,
on_trace_ready=torch.profiler.tensorboard_trace_handler("./log/"),
) as prof:
Comment on lines +1 to +11

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The torch.profiler.tensorboard_trace_handler will write trace files to the ./log/ directory. If this directory does not exist, the script will fail with a FileNotFoundError. It's good practice to ensure the directory exists before using it. Using a variable for the log directory also improves readability and maintainability.

Suggested change
import torch
import numpy as np
from torch.profiler import profile, record_function, ProfilerActivity
torch.cuda.synchronize()
with profile(
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
record_shapes=False,
profile_memory=False,
on_trace_ready=torch.profiler.tensorboard_trace_handler("./log/"),
) as prof:
import torch
import numpy as np
from torch.profiler import profile, record_function, ProfilerActivity
import os
log_dir = "./log/"
os.makedirs(log_dir, exist_ok=True)
torch.cuda.synchronize()
with profile(
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
record_shapes=False,
profile_memory=False,
on_trace_ready=torch.profiler.tensorboard_trace_handler(log_dir),
) as prof:

# test cuda code
pass

print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=20))