diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index faed5aea..dd063e28 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -310,7 +310,10 @@ class NPUPlatform(Platform): if parallel_config and parallel_config.worker_cls == "auto": # TODO: this is a tricky way to disable `use_sequence_parallel_moe` in vllm. - os.environ["VLLM_ALL2ALL_BACKEND"] = "flashinfer_all2allv" + if vllm_version_is("0.11.0"): + os.environ["VLLM_ALL2ALL_BACKEND"] = "flashinfer_all2allv" + else: + parallel_config.all2all_backend = "flashinfer_all2allv" if ascend_config.torchair_graph_config.enabled or ascend_config.enable_shared_expert_dp: parallel_config.worker_cls = "vllm_ascend.torchair.torchair_worker.NPUTorchairWorker" else: