【fix】ops gatingtopk fix nightly ci error (#4340)

### What this PR does / why we need it? This pr https://github.com/vllm-project/vllm-ascend/pull/2958 is supporting gatingtopk operator generalization, but caused nightly ci error. Now we add check logits for ops gatingtopk, and fix nightly ci. - vLLM version: v0.12.0 Signed-off-by: 1092626063 <1092626063@qq.com>
2025-12-04 20:09:21 +08:00
parent da84eb2f40
commit b3e1377a92
3 changed files with 53 additions and 22 deletions
--- a/tests/e2e/nightly/ops/test_fused_moe.py
+++ b/tests/e2e/nightly/ops/test_fused_moe.py
@@ -28,7 +28,8 @@ import torch
 import torch_npu
 from vllm.model_executor.layers.activation import SiluAndMul

-from vllm_ascend.ops.fused_moe.experts_selector import select_experts
+from vllm_ascend.ops.fused_moe.experts_selector import (
+    check_npu_moe_gating_top_k, select_experts)
 from vllm_ascend.ops.fused_moe.moe_mlp import unified_apply_mlp
 from vllm_ascend.ops.fused_moe.token_dispatcher import \
    TokenDispatcherWithAllGather
@@ -303,7 +304,10 @@ def test_select_experts(
            e_score_correction_bias=e_score_correction_bias,
        )

-        if use_grouped_topk:
+        call_moe_gatingtopk = check_npu_moe_gating_top_k(
+            hidden_states, topk, topk_group, num_expert_group, scoring_func,
+            custom_routing_function)
+        if not call_moe_gatingtopk and use_grouped_topk:
            mock_native_grouped_topk.assert_called_once()
        else:
            mock_native_grouped_topk.assert_not_called()