Revert "GMM custom operator optimization in small batch scenarios (vllm-project#7100)" (#7557)

### What this PR does / why we need it? This reverts commit 42bcad7e9b. The commit cause accuracy decrease of qwen3Next, 150 items of gsm8k, 98 -> 91. - vLLM version: v0.18.0 - vLLM main: 6a9cceb219 Signed-off-by: Your Name <you@example.com> Co-authored-by: Your Name <you@example.com>
2026-03-24 14:24:44 +08:00
parent 83bd77c983
commit 475b4b0cea
3 changed files with 30 additions and 71 deletions
--- a/csrc/torch_binding.cpp
+++ b/csrc/torch_binding.cpp
@@ -691,7 +691,7 @@ std::vector<at::Tensor> moe_grouped_matmul(
    y.emplace_back(y_0);
    at::TensorList result = at::TensorList(y);

-    EXEC_NPU_CMD(aclnnMoeGroupedMatmul,
+    EXEC_NPU_CMD(aclnnMoeGroupedMatmulWeightNz,
                x_list, weight_list, group_list, transpose_weight, result);

    return y;