[Kernel] Add moe_gating_top_k operator support for Ascend NPU (#5579)

### What this PR does / why we need it?

1.replace moe_gating_top_k from torch_npu with custom op
2.enable the  renorm function of moe_gating_top_k in softmax scenerio

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
No need test

- vLLM version: v0.13.0
- vLLM main:
7157596103

---------

Signed-off-by: ZCG12345 <2097562023@qq.com>
This commit is contained in:
ZCG12345
2026-01-07 21:42:31 +08:00
committed by GitHub
parent 1165b2c863
commit 3be8e33fe9
32 changed files with 4667 additions and 13 deletions

View File

@@ -24,7 +24,7 @@ elif [[ "$SOC_VERSION" =~ ^ascend910b ]]; then
ABSOLUTE_CATLASS_PATH=$(cd "${CATLASS_PATH}" && pwd)
export CPATH=${ABSOLUTE_CATLASS_PATH}:${CPATH}
CUSTOM_OPS="grouped_matmul_swiglu_quant_weight_nz_tensor_list;lightning_indexer;sparse_flash_attention;matmul_allreduce_add_rmsnorm;moe_init_routing_custom"
CUSTOM_OPS="grouped_matmul_swiglu_quant_weight_nz_tensor_list;lightning_indexer;sparse_flash_attention;matmul_allreduce_add_rmsnorm;moe_init_routing_custom;moe_gating_top_k;"
SOC_ARG="ascend910b"
elif [[ "$SOC_VERSION" =~ ^ascend910_93 ]]; then
# ASCEND910C (A3) series
@@ -70,6 +70,7 @@ elif [[ "$SOC_VERSION" =~ ^ascend910_93 ]]; then
"dispatch_layout"
"notify_dispatch"
"moe_init_routing_custom"
"moe_gating_top_k"
)
CUSTOM_OPS=$(IFS=';'; echo "${CUSTOM_OPS_ARRAY[*]}")
SOC_ARG="ascend910_93"