Skip to content

Commit a05a02f

Browse files
committed
quick fix eplb
Signed-off-by: Che Ruan <[email protected]>
1 parent 96c3623 commit a05a02f

File tree

4 files changed

+22
-10
lines changed

4 files changed

+22
-10
lines changed

vllm_ascend/eplb/adaptor/vllm_adaptor.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,20 +194,34 @@ def _export_tensor_to_file(self, expert_maps, expert_map_record_path: str):
194194
json.dump(record, f, indent=4)
195195

196196
def do_update_expert_map(self, layer_id, updated_expert_map):
197-
self.expert_map_per_layer[layer_id] = updated_expert_map.clone()
198-
self.expert_map_per_layer_cpu[layer_id] = updated_expert_map.clone()
197+
pad_len = self.expert_map_per_layer[layer_id].shape[0] - updated_expert_map.shape[0]
198+
updated_expert_map_padded = torch.nn.functional.pad(
199+
updated_expert_map,
200+
pad=(0,pad_len),
201+
mode='constant',
202+
value=-1
203+
)
204+
self.expert_map_per_layer[layer_id].copy_(updated_expert_map_padded)
205+
self.expert_map_per_layer_cpu[layer_id].copy_(updated_expert_map)
199206

200207
def do_update_expert_weight(self, layer_id, local_expert_to_replace,
201208
buffer_tensor_id):
202209
for expert_tensor, buffer_tensor in zip(
203210
self.expert_param_per_layer[layer_id][local_expert_to_replace],
204211
self.buffer_tensor_list[buffer_tensor_id]):
205-
expert_tensor = buffer_tensor.clone()
212+
expert_tensor.copy_(buffer_tensor)
206213
logger.debug(f"Expert tensor shape is :{expert_tensor.shape}")
207214

208215
def do_update_log2phy_map(self, layer_id, updated_log2phy_map):
209216
if self.log2phy_map_per_layer[layer_id] is not None:
210-
self.log2phy_map_per_layer[layer_id].copy_(updated_log2phy_map)
217+
pad_len = self.log2phy_map_per_layer[layer_id].shape[0] - updated_log2phy_map.shape[0]
218+
updated_log2phy_map_padded = torch.nn.functional.pad(
219+
updated_log2phy_map,
220+
pad=(0,pad_len),
221+
mode='constant',
222+
value=-1
223+
)
224+
self.log2phy_map_per_layer[layer_id].copy_(updated_log2phy_map_padded)
211225

212226
def global2local(self, placement: torch.Tensor,
213227
E_local: int) -> torch.Tensor:

vllm_ascend/eplb/core/eplb_device_transfer_loader.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,6 @@ def generate_expert_d2d_transfer_task(self, expert_send_info,
5050
)
5151
return
5252

53-
# If neither send nor receive task is needed for this layer on this rank, return
54-
if not (expert_send_info or expert_recv_info):
55-
return
56-
5753
self.updated_expert_map = updated_expert_map
5854

5955
self.layer_id = layer_id

vllm_ascend/ops/moe/moe_mlp.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,16 @@ def quant_apply_mlp(hidden_states: torch.Tensor,
105105
group_list=group_list,
106106
output_dtype=torch.int32)[0]
107107
# act_fn: swiglu
108+
group_diff = torch.diff( group_list)
109+
new_group = torch.cat( [ group_list[0].unsqueeze(0), group_diff ],dim=0)
108110
hidden_states, swiglu_out_scale = torch_npu.npu_dequant_swiglu_quant(
109111
x=hidden_states,
110112
weight_scale=w1_scale,
111113
activation_scale=pertoken_scale,
112114
bias=None,
113115
quant_scale=None,
114116
quant_offset=None,
115-
group_index=group_list,
117+
group_index=new_group,
116118
activate_left=True,
117119
quant_mode=1,
118120
)

vllm_ascend/quantization/w8a8_dynamic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ def apply(
249249
return moe_comm_method.fused_experts(
250250
hidden_states=x,
251251
w1=layer.w13_weight,
252-
w1_scale=layer.w13_weight_scale_fp32,
252+
w1_scale=layer.w13_weight_scale.to(torch.float32),
253253
w2=layer.w2_weight,
254254
w2_scale=layer.w2_weight_scale,
255255
topk_weights=topk_weights,

0 commit comments

Comments
 (0)