[Qwen-moe] Remove the minor operation arange (#2373)

### What this PR does / why we need it? Integrate the arange operator to reduce the time spent and improve performance ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.10.1.1 - vLLM main: 56dcf4e7e9 --------- Signed-off-by: s30076806 <songjiayang2@h-partners.com>
2025-08-27 09:13:31 +08:00
parent 358ba68994
commit 6a4ec186e7
9 changed files with 80 additions and 79 deletions
--- a/tests/e2e/singlecard/ops/test_fused_moe.py
+++ b/tests/e2e/singlecard/ops/test_fused_moe.py
@@ -92,8 +92,15 @@ def test_fused_experts(
    score = torch.softmax(score, dim=-1, dtype=dtype)
    topk_weights, topk_ids = torch.topk(score, topk)
    topk_ids = topk_ids.to(torch.int32)
+    row_idx = (torch.arange(
+        0,
+        m * topk,
+        device=device,
+        dtype=torch.int32,
+    ).view(topk, -1).permute(1, 0).contiguous())

-    output = fused_experts(a, w1, w2, topk_weights, topk_ids, topk, e_map)
+    output = fused_experts(a, w1, w2, topk_weights, topk_ids, row_idx, topk,
+                           e_map)
    torch_output = torch_moe(a, w1, w2, topk_weights, topk_ids, topk, e_map)
    # TODO: The native params are: atol=2e-2, rtol=0, maybe related to the nan problem
    torch.testing.assert_close(output, torch_output, atol=4e-2, rtol=1)
@@ -148,7 +155,7 @@ def test_select_experts(
        mock_native_grouped_topk.side_effect = lambda x, num_groups, k: torch.randn_like(
            x)

-        topk_weights, topk_ids = select_experts(
+        topk_weights, topk_ids, row_idx = select_experts(
            hidden_states=hidden_states,
            router_logits=router_logits,
            top_k=topk,
@@ -169,6 +176,7 @@ def test_select_experts(
    assert topk_weights.shape == (m, topk)
    assert topk_ids.shape == (m, topk)
    assert topk_ids.dtype == torch.int32
+    assert row_idx.shape == (m, topk)


@pytest.mark.parametrize("device", DEVICE)
--- a/tests/ut/ops/test_fused_ops.py
+++ b/tests/ut/ops/test_fused_ops.py
@@ -405,7 +405,7 @@ class TestExpertsSelector:

        x = torch.randn(8, 2)
        router_logits = torch.randn(8, 2)
-        topk_weights, topk_ids = select_experts(
+        topk_weights, topk_ids, _ = select_experts(
            hidden_states=x,
            router_logits=router_logits,
            top_k=2,
--- a/tests/ut/quantization/test_w8a8.py
+++ b/tests/ut/quantization/test_w8a8.py
@@ -719,12 +719,12 @@ class TestSelectExperts(TestBase):
    def test_softmax_scoring(self):
        """Test softmax scoring function"""

-        weights, ids = select_experts(hidden_states=self.hidden_states,
-                                      router_logits=self.router_logits,
-                                      top_k=self.top_k,
-                                      use_grouped_topk=False,
-                                      renormalize=False,
-                                      scoring_func="softmax")
+        weights, ids, _ = select_experts(hidden_states=self.hidden_states,
+                                         router_logits=self.router_logits,
+                                         top_k=self.top_k,
+                                         use_grouped_topk=False,
+                                         renormalize=False,
+                                         scoring_func="softmax")

        self.assertEqual(weights.shape, (self.num_tokens, self.top_k))
        self.assertEqual(ids.shape, (self.num_tokens, self.top_k))
@@ -732,12 +732,12 @@ class TestSelectExperts(TestBase):
    def test_sigmoid_scoring(self):
        """Test sigmoid scoring function"""

-        weights, ids = select_experts(hidden_states=self.hidden_states,
-                                      router_logits=self.router_logits,
-                                      top_k=self.top_k,
-                                      use_grouped_topk=False,
-                                      renormalize=False,
-                                      scoring_func="sigmoid")
+        weights, ids, _ = select_experts(hidden_states=self.hidden_states,
+                                         router_logits=self.router_logits,
+                                         top_k=self.top_k,
+                                         use_grouped_topk=False,
+                                         renormalize=False,
+                                         scoring_func="sigmoid")

        self.assertEqual(weights.shape, (self.num_tokens, self.top_k))
        self.assertEqual(ids.shape, (self.num_tokens, self.top_k))
@@ -760,13 +760,13 @@ class TestSelectExperts(TestBase):
                                              self.top_k,
                                              dtype=torch.long))

-        weights, ids = select_experts(hidden_states=self.hidden_states,
-                                      router_logits=self.router_logits,
-                                      top_k=self.top_k,
-                                      use_grouped_topk=True,
-                                      renormalize=False,
-                                      topk_group=4,
-                                      num_expert_group=2)
+        weights, ids, _ = select_experts(hidden_states=self.hidden_states,
+                                         router_logits=self.router_logits,
+                                         top_k=self.top_k,
+                                         use_grouped_topk=True,
+                                         renormalize=False,
+                                         topk_group=4,
+                                         num_expert_group=2)

        mock_topk.assert_called()
        self.assertEqual(weights.shape, (self.num_tokens, self.top_k))
@@ -780,7 +780,7 @@ class TestSelectExperts(TestBase):
                                                    self.num_experts)

        e_score_correction_bias = torch.randn(self.num_experts)
-        weights, ids = select_experts(
+        weights, ids, _ = select_experts(
            hidden_states=self.hidden_states,
            router_logits=self.router_logits,
            top_k=self.top_k,
@@ -803,7 +803,7 @@ class TestSelectExperts(TestBase):
                                                        self.top_k,
                                                        dtype=torch.int32))

-        weights, ids = select_experts(
+        weights, ids, _ = select_experts(
            hidden_states=self.hidden_states,
            router_logits=self.router_logits,
            top_k=self.top_k,
@@ -824,7 +824,7 @@ class TestSelectExperts(TestBase):
                                              self.top_k,
                                              dtype=torch.long))

-        weights, _ = select_experts(
+        weights, ids, _ = select_experts(
            hidden_states=self.hidden_states,
            router_logits=self.router_logits,
            top_k=self.top_k,
@@ -844,7 +844,7 @@ class TestSelectExperts(TestBase):
                                              self.top_k,
                                              dtype=torch.long))

-        weights, ids = select_experts(
+        weights, ids, _ = select_experts(
            hidden_states=self.hidden_states,
            router_logits=self.router_logits,
            top_k=self.top_k,
--- a/tests/ut/quantization/test_w8a8_dynamic.py
+++ b/tests/ut/quantization/test_w8a8_dynamic.py
@@ -55,6 +55,12 @@ class TestAscendW8A8FusedMoEMethod(TestBase):
            torch.randn(self.num_tokens),
        )
        mock_moe_finalize_routing.return_value = self.placeholder
+        row_idx_len = self.num_tokens * 8
+        row_idx = (torch.arange(
+            0,
+            row_idx_len,
+            dtype=torch.int32,
+        ).view(8, -1).permute(1, 0).contiguous())

        result = fused_experts_with_all2all(
            hidden_states=self.placeholder,
@@ -64,6 +70,7 @@ class TestAscendW8A8FusedMoEMethod(TestBase):
            w2_scale=self.placeholder,
            topk_weights=self.placeholder,
            topk_ids=self.placeholder,
+            row_idx=row_idx,
            top_k=8,
            expert_map=expert_map,
            ep_group=ep_group,