Revert "moe_gating_top_k" (#5512)

Reverts vllm-project/vllm-ascend#5271

It breaks e2e test

- vLLM version: v0.13.0
- vLLM main:
45c1ca1ca1
This commit is contained in:
zzzzwwjj
2025-12-30 15:05:47 +08:00
committed by GitHub
parent 4ff4d1cef9
commit 71f729a661
34 changed files with 22 additions and 4791 deletions

View File

@@ -17,6 +17,7 @@
from typing import Callable, Optional
import torch
import torch_npu
from vllm_ascend.utils import get_weight_prefetch_method
@@ -213,19 +214,21 @@ def _select_experts_with_fusion_ops(
e_score_correction_bias.dtype != router_logits.dtype:
e_score_correction_bias = e_score_correction_bias.to(
router_logits.dtype)
_, topk_ids, topk_weights = torch.ops._C_ascend.moe_gating_top_k(
topk_weights, topk_ids, _ = torch_npu.npu_moe_gating_top_k(
router_logits,
k=top_k,
kGroup=topk_group,
groupCount=num_expert_group,
groupSelectMode=1, # 0: the maximum in the group; 1: topk2.sum(fix)
renorm=1, # 0: softmax->topk(fix); 1: topk->softmax
normType=norm_type, # 0: softmax; 1: sigmoid
outFlag=False, # todo new api; should the third output be output
routedScalingFactor=1,
eps=float(1e-20),
biasOptional=e_score_correction_bias,
)
bias=e_score_correction_bias,
k_group=topk_group,
group_count=num_expert_group,
group_select_mode=1, # 0: the maximum in the group; 1: topk2.sum(fix)
renorm=0, # 0: softmax->topk(fix); 1: topk->softmax
norm_type=norm_type, # 0: softmax; 1: sigmoid
# out_flag=False, # todo new api; should the third output be output
# y2_flag=False, # old api; should the third output be output
routed_scaling_factor=1,
eps=float(1e-20))
if scoring_func == "softmax":
topk_weights = _renormalize_topk_weights(topk_weights, renormalize)
return topk_weights, topk_ids