[BugFix]Fix moe load problems in torchair when using dynamic eplb (#3381)

### What this PR does / why we need it? When using dynamic eplb, moe load is not imported. We fix this problem by modifying the return value of hidden states in torchair. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? DeepseekV3 in A3. - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: daishixun <dsxsteven@sina.com>
2025-10-13 11:38:57 +08:00
parent cd69385dab
commit 847d12a389
2 changed files with 12 additions and 4 deletions
--- a/vllm_ascend/torchair/ops/torchair_fused_moe.py
+++ b/vllm_ascend/torchair/ops/torchair_fused_moe.py
@@ -1279,13 +1279,15 @@ class TorchairAscendFusedMoE(FusedMoE):
        )

        if shared_experts:
-            if isinstance(e_hidden_states, tuple):
+            if isinstance(e_hidden_states,
+                          tuple) and len(e_hidden_states) == 2:
                e_hidden_states, shared_hidden_states = e_hidden_states

        if self.dynamic_eplb and isinstance(
                e_hidden_states, tuple) and len(e_hidden_states) == 3:
-            self.moe_load += e_hidden_states[2] if e_hidden_states[1] == 0 else \
-                torch.cat(e_hidden_states[2][:1], e_hidden_states[2][1:] - e_hidden_states[2][:-1])
+            e_hidden_states, group_list_type, expert_tokens = e_hidden_states
+            self.moe_load += expert_tokens if group_list_type else \
+                torch.cat([expert_tokens[:1], expert_tokens[1:] - expert_tokens[:-1]])

        if (fused_moe_state not in [
                FusedMoEState.AllGather, FusedMoEState.AllGatherEP,