diff --git a/vllm_ascend/ascend_forward_context.py b/vllm_ascend/ascend_forward_context.py index c28caa09..865048be 100644 --- a/vllm_ascend/ascend_forward_context.py +++ b/vllm_ascend/ascend_forward_context.py @@ -226,7 +226,8 @@ def select_moe_comm_method(num_tokens: int, vllm_config.model_config.hf_config, 'moe_quantize', getattr(vllm_config.model_config.hf_config, 'quantize', None)) - if not vllm_config.parallel_config.enable_expert_parallel: + if not vllm_config.parallel_config.enable_expert_parallel or get_ep_group( + ).world_size == 1: moe_comm_type = MoECommType.ALLGATHER elif soc_version in {AscendDeviceType.A2}: if (num_tokens <= mc2_tokens_capacity