From 09390eaf32aa0113056c03c935f2311b939faa89 Mon Sep 17 00:00:00 2001 From: Zetong Li <48438720+slippersss@users.noreply.github.com> Date: Fri, 26 Dec 2025 16:45:45 +0800 Subject: [PATCH] [Bugfix] Fix unsuitable moe_comm_type under ep=1 scenario (#5388) ### What this PR does / why we need it? This PR aims to fix unsuitable `moe_comm_type` under `ep=1` scenario. The related issue #5375 have reported that `ep=1` can cause errors in local environment, but those cases work well on ci. The point is the difference between machines and `moe_comm_type` may not be chosen correctly. ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? by ci - vLLM version: release/v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/bc0a5a0c089844b17cb93f3294348f411e523586 Signed-off-by: Zetong Li Co-authored-by: weijinqian0 <1184188277@qq.com> --- vllm_ascend/ascend_forward_context.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/ascend_forward_context.py b/vllm_ascend/ascend_forward_context.py index c28caa09..865048be 100644 --- a/vllm_ascend/ascend_forward_context.py +++ b/vllm_ascend/ascend_forward_context.py @@ -226,7 +226,8 @@ def select_moe_comm_method(num_tokens: int, vllm_config.model_config.hf_config, 'moe_quantize', getattr(vllm_config.model_config.hf_config, 'quantize', None)) - if not vllm_config.parallel_config.enable_expert_parallel: + if not vllm_config.parallel_config.enable_expert_parallel or get_ep_group( + ).world_size == 1: moe_comm_type = MoECommType.ALLGATHER elif soc_version in {AscendDeviceType.A2}: if (num_tokens <= mc2_tokens_capacity