[Bugfix][DispatchFFNCombine] resolve vec error caused by unaligned UB access (#6707)

### What this PR does / why we need it? 1. Fix a vec error caused by unaligned UB accesss in the DispatchFFNCombine; 2. Fix expert_token_nums tensor defined on host instead of NPU in moe_comm_method.py 3. Fix multi-core copy issue of expert_token_nums in dispatchffnCombine op (single aiv copy is sufficient) ### Does this PR introduce _any_ user-facing change? No, this PR does not introduce any user-facing changes. The fix only addresses internal memory access logic and does not modify any public APIs, interfaces, or user-visible behaviors. ### How was this patch tested? `export VLLM_ASCEND_ENABLE_FUSED_MC2=1` vLLM version: v0.15.0 - vLLM version: v0.15.0 - vLLM main: 9562912cea Signed-off-by: xulei_ict <xulei292@huawei.com> Co-authored-by: xulei_ict <xulei292@huawei.com>
2026-02-14 10:32:50 +08:00
parent e2175d9c7e
commit 1e77077788
3 changed files with 19 additions and 12 deletions
--- a/vllm_ascend/ops/fused_moe/moe_comm_method.py
+++ b/vllm_ascend/ops/fused_moe/moe_comm_method.py
@@ -277,6 +277,13 @@ class FusedMC2CommImpl(MoECommMethod):
    Communication and Computation parallelism on Ascend devices.
    """

+    def __init__(self, moe_config):
+        super().__init__(moe_config)
+        if envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2 == 1:
+            self.expert_token_nums = torch.zeros([self.moe_config.num_local_experts], dtype=torch.int32, device="npu")
+        else:
+            self.expert_token_nums = None
+
    def _get_token_dispatcher(self):
        return TokenDispatcherWithMC2()

@@ -325,7 +332,6 @@ class FusedMC2CommImpl(MoECommMethod):
        expert_tokens = None
        if envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2 == 1:
            out = torch.empty_like(hidden_states)
-            expert_token_nums = torch.zeros([self.moe_config.num_local_experts], dtype=torch.int32)
            torch.ops._C_ascend.dispatch_ffn_combine(  # type: ignore
                x=hidden_states,
                weight1=w1,
@@ -337,9 +343,9 @@ class FusedMC2CommImpl(MoECommMethod):
                group=self.token_dispatcher.moe_all_to_all_group_name,
                max_output_size=65536,
                out=out,
-                expert_token_nums=expert_token_nums,
+                expert_token_nums=self.expert_token_nums,
            )
-            expert_tokens = expert_token_nums
+            expert_tokens = self.expert_token_nums
        elif envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2 == 2:
            assert expert_map is not None, "expert_map cannot be None."
            group_list_type = 1