@@ -739,6 +739,9 @@ def get_hccl_config_for_pg_options(group_name: str) -> Optional[dict]:
739739 "dp" : {
740740 "hccl_buffer_size" : calculate_dp_buffer_size ()
741741 },
742+ "ep" : {
743+ "hccl_buffer_size" : calculate_ep_buffer_size ()
744+ },
742745 }
743746 return hccl_config_map .get (group_name , get_default_buffer_config ())
744747
@@ -760,6 +763,25 @@ def calculate_dp_buffer_size() -> int:
760763 return max (dp_buffer_size , _MIN_DP_BUFFER_SIZE )
761764
762765
766+ def calculate_ep_buffer_size () -> int :
767+ """
768+ formula of ep buffer size:
769+ batch_size * hidden_size * topk * 4
770+ """
771+ from vllm .config import get_current_vllm_config
772+ vllm_config = get_current_vllm_config ()
773+ hf_config = vllm_config .model_config .hf_config
774+
775+ hidden_size = hf_config .hidden_size
776+ topk = getattr (hf_config , "num_experts_per_token" , 1 )
777+ batch_size = vllm_config .scheduler_config .max_num_batched_tokens
778+ int8_size = torch .iinfo (torch .int8 ).bits // 8
779+ bf16_size = torch .finfo (torch .bfloat16 ).bits // 8
780+ ep_buffer_size = math .ceil ((batch_size * hidden_size * topk *
781+ (int8_size * 2 + bf16_size )) / (1024 * 1024 ))
782+ return max (ep_buffer_size , _DEFAULT_BUFFER_SIZE )
783+
784+
763785# Currently, when in A2, setting the environment variables HCCL_INTRA_PCIE_ENABLE=1
764786# and HCCL_INTRA_ROCE_ENABLE=0 can reduce cross-machine communication traffic and
765787# significantly improve communication performance of MC2 ops dispatch/combine.
0 commit comments