From 474fa737c865dfaece24aa760e16095d9b05d6d6 Mon Sep 17 00:00:00 2001 From: weijinqian0 <1184188277@qq.com> Date: Tue, 30 Sep 2025 18:45:09 +0800 Subject: [PATCH] [bugfix] Fix moe bug: allgather error. (#3279) It will crash when deepseek model executed in A2. - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/releases/v0.11.0 --------- Signed-off-by: weijinqian_v1 Co-authored-by: weijinqian_v1 --- tests/ut/ops/test_token_dispatcher.py | 1 + vllm_ascend/ops/moe/token_dispatcher.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/ut/ops/test_token_dispatcher.py b/tests/ut/ops/test_token_dispatcher.py index cc2d307..ed32b93 100644 --- a/tests/ut/ops/test_token_dispatcher.py +++ b/tests/ut/ops/test_token_dispatcher.py @@ -269,6 +269,7 @@ class TestTokenDispatcherWithAllGather(TestBase): def test_token_combine_with_expert_map(self): self.dispatcher.expert_map = torch.tensor([0, 1, 2, 3]) self.dispatcher.sorted_token_indices = torch.tensor([0, 1, 1, 1, 1, 1]) + self.dispatcher.expanded_row_idx = torch.tensor([0, 1, 1, 1, 1, 1]) self.dispatcher.sorted_weights = torch.tensor( [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]) self.dispatcher.original_shape = (3, 128) diff --git a/vllm_ascend/ops/moe/token_dispatcher.py b/vllm_ascend/ops/moe/token_dispatcher.py index b36cc44..5e17815 100644 --- a/vllm_ascend/ops/moe/token_dispatcher.py +++ b/vllm_ascend/ops/moe/token_dispatcher.py @@ -383,7 +383,7 @@ class TokenDispatcherWithAllGather(MoETokenDispatcher): assert self.original_shape is not None final_hidden_states = torch_npu.npu_moe_token_unpermute( permuted_tokens=hidden_states, - sorted_indices=self.expanded_row_idx, + sorted_indices=torch.abs(self.expanded_row_idx), probs=self.topk_weights) if len(self.original_shape) == 3: final_hidden_states = final_hidden_states.view(self.original_shape)