From 050d202bb97db481db2db3c142f5eb08608e3c7a Mon Sep 17 00:00:00 2001 From: Mengqing Cao Date: Mon, 29 Sep 2025 09:12:49 +0800 Subject: [PATCH] [Quickfix] Fix dp+ep+tp error when sp chunked the hidden_states (#3246) ### What this PR does / why we need it? Fix dp+ep+tp inplace copy error when sp chunked the `hidden_states`. ### How was this patch tested? test locally with the following scripts ```bash python examples/offline_data_parallel.py \ --model="Qwen/Qwen3-30B-A3B" \ --dp-size=2 \ --tp-size=2 \ --enable-expert-parallel ``` Signed-off-by: MengqingCao --- vllm_ascend/ops/fused_moe.py | 1 + vllm_ascend/platform.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/vllm_ascend/ops/fused_moe.py b/vllm_ascend/ops/fused_moe.py index 533c20b..97489f9 100644 --- a/vllm_ascend/ops/fused_moe.py +++ b/vllm_ascend/ops/fused_moe.py @@ -295,6 +295,7 @@ class AscendFusedMoE(FusedMoE): in_dtype=params_dtype, ) self.moe_config = moe + # TODO: The self.moe_config.tp_size here is not correct, fixme soon if quant_config is None: self.quant_method = AscendUnquantizedFusedMoEMethod(moe) diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index dbfe1dc..1f12c59 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -16,6 +16,7 @@ # import gc +import os from datetime import timedelta from typing import TYPE_CHECKING, Optional, Tuple @@ -260,6 +261,8 @@ class NPUPlatform(Platform): compilation_config.level = CompilationLevel.NO_COMPILATION if parallel_config and parallel_config.worker_cls == "auto": + # TODO: this is a tricky way to disable `use_sequence_parallel_moe` in vllm. + os.environ["VLLM_ALL2ALL_BACKEND"] = "flashinfer_all2allv" if ascend_config.torchair_graph_config.enabled or ascend_config.enable_shared_expert_dp: parallel_config.worker_cls = "vllm_ascend.torchair.torchair_worker.NPUTorchairWorker" else: