[Bugfix]Fix moe error when sp chunked the hidden_states (#4212)

### What this PR does / why we need it? Fix moe error when sp chunked the hidden_states by disabling sp by a hacky way - vLLM version: v0.11.0 - vLLM main: 2918c1b49c --------- Signed-off-by: weiguihua2 <weiguihua2@huawei.com>
2025-11-17 22:55:17 +08:00
parent 3677202594
commit da1cd9c7ca
1 changed files with 4 additions and 1 deletions
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -310,7 +310,10 @@ class NPUPlatform(Platform):
        if parallel_config and parallel_config.worker_cls == "auto":
            # TODO: this is a tricky way to disable `use_sequence_parallel_moe` in vllm.
            if vllm_version_is("0.11.0"):
                os.environ["VLLM_ALL2ALL_BACKEND"] = "flashinfer_all2allv"
            else:
                parallel_config.all2all_backend = "flashinfer_all2allv"
            if ascend_config.torchair_graph_config.enabled or ascend_config.enable_shared_expert_dp:
                parallel_config.worker_cls = "vllm_ascend.torchair.torchair_worker.NPUTorchairWorker"
            else: