From da1cd9c7ca149b0bdcec2ad0778b467c3f67ed7c Mon Sep 17 00:00:00 2001
From: weiguihua2 <weiguihua2@huawei.com>
Date: Mon, 17 Nov 2025 22:55:17 +0800
Subject: [PATCH] [Bugfix]Fix moe error when sp chunked the hidden_states
 (#4212)

### What this PR does / why we need it?
Fix moe error when sp chunked the hidden_states by disabling sp by a hacky way

- vLLM version: v0.11.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379

---------

Signed-off-by: weiguihua2 <weiguihua2@huawei.com>
---
 vllm_ascend/platform.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py
index faed5aea..dd063e28 100644
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -310,7 +310,10 @@ class NPUPlatform(Platform):
 
         if parallel_config and parallel_config.worker_cls == "auto":
             # TODO: this is a tricky way to disable `use_sequence_parallel_moe` in vllm.
-            os.environ["VLLM_ALL2ALL_BACKEND"] = "flashinfer_all2allv"
+            if vllm_version_is("0.11.0"):
+                os.environ["VLLM_ALL2ALL_BACKEND"] = "flashinfer_all2allv"
+            else:
+                parallel_config.all2all_backend = "flashinfer_all2allv"
             if ascend_config.torchair_graph_config.enabled or ascend_config.enable_shared_expert_dp:
                 parallel_config.worker_cls = "vllm_ascend.torchair.torchair_worker.NPUTorchairWorker"
             else: