From ee0a95e47f98c98e13f85a37c0ca9515d71f1537 Mon Sep 17 00:00:00 2001 From: zouyida2052 Date: Fri, 10 Oct 2025 23:07:24 +0800 Subject: [PATCH] bugfix for mtp when running torchair in a2 (#3354) ### What this PR does / why we need it? when ops torchair_fused_experts_with_mc2 is called, we need pass a tp group, but now it only pass when quantized scenario, we need also pass it when unquantized. - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: zouyida2052 --- vllm_ascend/torchair/ops/torchair_fused_moe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm_ascend/torchair/ops/torchair_fused_moe.py b/vllm_ascend/torchair/ops/torchair_fused_moe.py index 3e63831..0f0464c 100644 --- a/vllm_ascend/torchair/ops/torchair_fused_moe.py +++ b/vllm_ascend/torchair/ops/torchair_fused_moe.py @@ -899,6 +899,7 @@ class TorchairAscendUnquantizedFusedMoEMethod(UnquantizedFusedMoEMethod): expert_map=expert_map, moe_all_to_all_group_name=self.moe_all_to_all_group_name, shared_experts=shared_experts, + is_torchair=self.torchair_graph_enabled, mc2_mask=kwargs.get("mc2_mask", None)) elif fused_moe_state in [ FusedMoEState.AllGather, FusedMoEState.NaiveMulticast