Skip to content

Commit 810297d

Browse files
committed
[bugfix] the hccl_buffsize configuration for EP
Signed-off-by: mojave2 <[email protected]>
1 parent cd58a64 commit 810297d

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

vllm_ascend/utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,9 @@ def get_hccl_config_for_pg_options(group_name: str) -> Optional[dict]:
739739
"dp": {
740740
"hccl_buffer_size": calculate_dp_buffer_size()
741741
},
742+
"ep": {
743+
"hccl_buffer_size": calculate_ep_buffer_size()
744+
},
742745
}
743746
return hccl_config_map.get(group_name, get_default_buffer_config())
744747

@@ -760,6 +763,25 @@ def calculate_dp_buffer_size() -> int:
760763
return max(dp_buffer_size, _MIN_DP_BUFFER_SIZE)
761764

762765

766+
def calculate_ep_buffer_size() -> int:
767+
"""
768+
formula of ep buffer size:
769+
batch_size * hidden_size * topk * 4
770+
"""
771+
from vllm.config import get_current_vllm_config
772+
vllm_config = get_current_vllm_config()
773+
hf_config = vllm_config.model_config.hf_config
774+
775+
hidden_size = hf_config.hidden_size
776+
topk = getattr(hf_config, "num_experts_per_token", 1)
777+
batch_size = vllm_config.scheduler_config.max_num_batched_tokens
778+
int8_size = torch.iinfo(torch.int8).bits // 8
779+
bf16_size = torch.finfo(torch.bfloat16).bits // 8
780+
ep_buffer_size = math.ceil((batch_size * hidden_size * topk *
781+
(int8_size * 2 + bf16_size)) / (1024 * 1024))
782+
return max(ep_buffer_size, _DEFAULT_BUFFER_SIZE)
783+
784+
763785
# Currently, when in A2, setting the environment variables HCCL_INTRA_PCIE_ENABLE=1
764786
# and HCCL_INTRA_ROCE_ENABLE=0 can reduce cross-machine communication traffic and
765787
# significantly improve communication performance of MC2 ops dispatch/combine.

0 commit comments

Comments
 (0)