Revert "moe_gating_top_k" (#5512)

Reverts vllm-project/vllm-ascend#5271 It breaks e2e test - vLLM version: v0.13.0 - vLLM main: 45c1ca1ca1
2025-12-30 15:05:47 +08:00
parent 4ff4d1cef9
commit 71f729a661
34 changed files with 22 additions and 4791 deletions
--- a/vllm_ascend/ops/fused_moe/experts_selector.py
+++ b/vllm_ascend/ops/fused_moe/experts_selector.py
@@ -17,6 +17,7 @@
 from typing import Callable, Optional

 import torch
+import torch_npu

 from vllm_ascend.utils import get_weight_prefetch_method

@@ -213,19 +214,21 @@ def _select_experts_with_fusion_ops(
        e_score_correction_bias.dtype != router_logits.dtype:
        e_score_correction_bias = e_score_correction_bias.to(
            router_logits.dtype)
-    _, topk_ids, topk_weights = torch.ops._C_ascend.moe_gating_top_k(
+    topk_weights, topk_ids, _ = torch_npu.npu_moe_gating_top_k(
        router_logits,
        k=top_k,
-        kGroup=topk_group,
-        groupCount=num_expert_group,
-        groupSelectMode=1,  # 0: the maximum in the group; 1: topk2.sum(fix)
-        renorm=1,  # 0: softmax->topk(fix); 1: topk->softmax
-        normType=norm_type,  # 0: softmax; 1: sigmoid
-        outFlag=False,  # todo new api; should the third output be output
-        routedScalingFactor=1,
-        eps=float(1e-20),
-        biasOptional=e_score_correction_bias,
-    )
+        bias=e_score_correction_bias,
+        k_group=topk_group,
+        group_count=num_expert_group,
+        group_select_mode=1,  # 0: the maximum in the group; 1: topk2.sum(fix)
+        renorm=0,  # 0: softmax->topk(fix); 1: topk->softmax
+        norm_type=norm_type,  # 0: softmax; 1: sigmoid
+        # out_flag=False, # todo new api; should the third output be output
+        # y2_flag=False, # old api; should the third output be output
+        routed_scaling_factor=1,
+        eps=float(1e-20))
+    if scoring_func == "softmax":
+        topk_weights = _renormalize_topk_weights(topk_weights, renormalize)

    return topk_weights, topk_ids