Remove unused row_idx in token_dispatcher (#3442)

### What this PR does / why we need it? The `row_idx` parameter is no longer used since PR[#2689](https://github.com/vllm-project/vllm-ascend/pull/2689), so remove it across multiple files to remove unnecessary calculations and parameter passing. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? accuracy test passed for Qwen3 235B and DeepSeek V3 671B after this PR. - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: CaranLic <740821011@qq.com>
2025-10-15 09:08:31 +08:00
parent 3642b64afc
commit 15b2e5c995
11 changed files with 37 additions and 88 deletions
--- a/vllm_ascend/ops/moe/experts_selector.py
+++ b/vllm_ascend/ops/moe/experts_selector.py
@@ -21,17 +21,6 @@ import torch_npu
 from vllm.forward_context import get_forward_context


-def return_row_idx(hidden_states, top_k):
-    num_tokens = hidden_states.shape[0]
-    row_idx_len = num_tokens * top_k
-    row_idx = (torch.arange(0,
-                            row_idx_len,
-                            dtype=torch.int32,
-                            device=hidden_states.device).view(
-                                top_k, -1).permute(1, 0).contiguous())
-    return row_idx
-
-
 def select_experts(hidden_states: torch.Tensor,
                   router_logits: torch.Tensor,
                   top_k: int,
@@ -71,7 +60,7 @@ def select_experts(hidden_states: torch.Tensor,
    if weight_prefetch_method:
        weight_prefetch_method.maybe_prefetch_moe_weight_preprocess(
            hidden_states, "gate_up")
-    topk_weights, topk_ids, row_idx = _select_experts_with_fusion_ops(
+    topk_weights, topk_ids = _select_experts_with_fusion_ops(
        hidden_states=hidden_states,
        router_logits=router_logits,
        top_k=top_k,
@@ -99,9 +88,7 @@ def select_experts(hidden_states: torch.Tensor,
            e_score_correction_bias=e_score_correction_bias,
            global_num_experts=global_num_experts,
        )
-    if row_idx is None:
-        row_idx = return_row_idx(hidden_states, top_k)
-    return topk_weights, topk_ids, row_idx
+    return topk_weights, topk_ids


 def _native_grouped_topk(
@@ -187,7 +174,7 @@ def _select_experts_with_fusion_ops(
        routed_scaling_factor=1.0,
        global_num_experts: int = -1):

-    topk_weights, topk_ids, row_idx = None, None, None
+    topk_weights, topk_ids = None, None
    # NOTE: now npu_moe_gating_top_k can only support 'group_count=256' pattern
    is_deepseek_v3_r1 = global_num_experts == 256
    if is_deepseek_v3_r1:
@@ -205,14 +192,13 @@ def _select_experts_with_fusion_ops(
            # y2_flag=False, # old api; should the third output be output
            routed_scaling_factor=1,
            eps=float(1e-20))
-        row_idx = return_row_idx(hidden_states, top_k)
    if not use_grouped_topk and custom_routing_function is None and scoring_func == "softmax":
-        topk_weights, topk_ids, row_idx = torch_npu.npu_moe_gating_top_k_softmax(
+        topk_weights, topk_ids, _ = torch_npu.npu_moe_gating_top_k_softmax(
            x=router_logits, finished=None, k=top_k)
        topk_ids = topk_ids.to(torch.int32)
        topk_weights = _renormalize_topk_weights(topk_weights, renormalize)

-    return topk_weights, topk_ids, row_idx
+    return topk_weights, topk_ids


 def _native_select_experts(