From 7f1e93f185898093ecb0c723d39c379cfcfe92f1 Mon Sep 17 00:00:00 2001 From: weichen Date: Wed, 17 Dec 2025 17:39:57 +0800 Subject: [PATCH] [Bugfix][MoE] Remove All2All in w4a8_dynamic (#4977) ### What this PR does / why we need it? GatherEP has been fixed in https://github.com/vllm-project/vllm-ascend/pull/3279, remove all2all in w4a8_dynamic scenario. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? e2e & ut - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 Signed-off-by: weichen --- vllm_ascend/ascend_forward_context.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/vllm_ascend/ascend_forward_context.py b/vllm_ascend/ascend_forward_context.py index 8106c935..6f402ca8 100644 --- a/vllm_ascend/ascend_forward_context.py +++ b/vllm_ascend/ascend_forward_context.py @@ -249,11 +249,7 @@ def select_moe_comm_method(num_tokens: int, vllm_config.parallel_config.pipeline_parallel_size >= 16): moe_comm_type = MoECommType.MC2 else: - # Currently, w4a8_dynamic does not support allgatherep - if quant_type == "w4a8_dynamic": - moe_comm_type = MoECommType.ALLTOALL - else: - moe_comm_type = MoECommType.ALLGATHER + moe_comm_type = MoECommType.ALLGATHER elif soc_version in {AscendDeviceType.A3}: ascend_config = get_ascend_config()