[Bugfix]Fix moe error when sp chunked the hidden_states (#4212)
### What this PR does / why we need it?
Fix moe error when sp chunked the hidden_states by disabling sp by a hacky way
- vLLM version: v0.11.0
- vLLM main:
2918c1b49c
---------
Signed-off-by: weiguihua2 <weiguihua2@huawei.com>
This commit is contained in:
@@ -310,7 +310,10 @@ class NPUPlatform(Platform):
|
|||||||
|
|
||||||
if parallel_config and parallel_config.worker_cls == "auto":
|
if parallel_config and parallel_config.worker_cls == "auto":
|
||||||
# TODO: this is a tricky way to disable `use_sequence_parallel_moe` in vllm.
|
# TODO: this is a tricky way to disable `use_sequence_parallel_moe` in vllm.
|
||||||
|
if vllm_version_is("0.11.0"):
|
||||||
os.environ["VLLM_ALL2ALL_BACKEND"] = "flashinfer_all2allv"
|
os.environ["VLLM_ALL2ALL_BACKEND"] = "flashinfer_all2allv"
|
||||||
|
else:
|
||||||
|
parallel_config.all2all_backend = "flashinfer_all2allv"
|
||||||
if ascend_config.torchair_graph_config.enabled or ascend_config.enable_shared_expert_dp:
|
if ascend_config.torchair_graph_config.enabled or ascend_config.enable_shared_expert_dp:
|
||||||
parallel_config.worker_cls = "vllm_ascend.torchair.torchair_worker.NPUTorchairWorker"
|
parallel_config.worker_cls = "vllm_ascend.torchair.torchair_worker.NPUTorchairWorker"
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user