From 91c35d765aa2edeb3e9c805f2fe3c330320fe696 Mon Sep 17 00:00:00 2001 From: Mengqing Cao Date: Fri, 29 Aug 2025 21:59:16 +0800 Subject: [PATCH] [Bugfix] Fix mc2 operator error in aclgraph + ep<16 scenario (#2609) ### What this PR does / why we need it? 1. quickfix mc2 operator error in aclgraph + ep<16 scenario to recover CI, will be refactorred in the future 2. disable aclgraph when testing w8a8 ### How was this patch tested? CI passed with existing test. - vLLM version: v0.10.1.1 - vLLM main: https://github.com/vllm-project/vllm/commit/95089607fa307c5facfb9706ea919292fb56e78c Signed-off-by: MengqingCao --- tests/e2e/multicard/test_qwen3_moe.py | 3 ++- vllm_ascend/ops/common_fused_moe.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/e2e/multicard/test_qwen3_moe.py b/tests/e2e/multicard/test_qwen3_moe.py index 5dfe36a..a17de55 100644 --- a/tests/e2e/multicard/test_qwen3_moe.py +++ b/tests/e2e/multicard/test_qwen3_moe.py @@ -55,6 +55,7 @@ def test_models_distributed_Qwen3_MOE_TP2_WITH_EP(): tensor_parallel_size=2, enable_expert_parallel=True, distributed_executor_backend="mp", + enforce_eager=False, ) as vllm_model: vllm_model.generate_greedy(example_prompts, max_tokens) @@ -71,7 +72,7 @@ def test_models_distributed_Qwen3_MOE_W8A8(): dtype=dtype, tensor_parallel_size=2, quantization="ascend", - enforce_eager=False, + enforce_eager=True, ) as vllm_model: vllm_model.generate_greedy(example_prompts, max_tokens) diff --git a/vllm_ascend/ops/common_fused_moe.py b/vllm_ascend/ops/common_fused_moe.py index 72ee91b..dd38c23 100644 --- a/vllm_ascend/ops/common_fused_moe.py +++ b/vllm_ascend/ops/common_fused_moe.py @@ -242,7 +242,8 @@ class AscendFusedMoE(FusedMoE): moe_comm_method_name = forward_context.moe_comm_method_name # TODO: Can we refactor this logic to model_runner? - if not self.moe_config.use_ep: + # TODO: Adjusted logic to differentiate between A2 and A3, we check ep_size here since mc2 only support ep_size >= 16 on A3 now + if self.moe_config.ep_size < 16: moe_comm_method_name = "allgathercommimpl" forward_context.moe_comm_method = getattr(self, moe_comm_method_name)