From 706de02317625fa3477f5379e31caaec0b174453 Mon Sep 17 00:00:00 2001 From: linfeng-yuan <1102311262@qq.com> Date: Tue, 10 Jun 2025 08:39:24 +0800 Subject: [PATCH] [fix] fix compatibility for non-EPLB scenarios (#1142) ### What this PR does / why we need it? Fix incompatibility problem for non-EPLB scenarios in #1116 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Tested with online serving and e2e CI. Signed-off-by: linfeng-yuan <1102311262@qq.com> --- vllm_ascend/quantization/w8a8_dynamic.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vllm_ascend/quantization/w8a8_dynamic.py b/vllm_ascend/quantization/w8a8_dynamic.py index 2b7d57c..c6e863f 100644 --- a/vllm_ascend/quantization/w8a8_dynamic.py +++ b/vllm_ascend/quantization/w8a8_dynamic.py @@ -150,8 +150,8 @@ def fused_experts_with_mc2(hidden_states: torch.Tensor, log2phy: torch.Tensor = None, global_redundant_expert_num: int = 0, **kwargs) -> torch.Tensor: - - topk_ids = log2phy[topk_ids] + if log2phy: + topk_ids = log2phy[topk_ids] global_bs = 0 moe_expert_num = len(expert_map) + global_redundant_expert_num # hidden_states = hidden_states.bfloat16() @@ -278,7 +278,8 @@ def fused_experts_with_all2all( log2phy: torch.Tensor = None, global_redundant_expert_num: int = 0, ): - topk_ids = log2phy[topk_ids] + if log2phy: + topk_ids = log2phy[topk_ids] original_shape = hidden_states.shape if len(original_shape) == 3: hidden_states = hidden_states.view(-1, hidden_states.shape[-1])