refactor select_experts of moe module (#2150)

### What this PR does / why we need it? this pr refactor select_experts of moe module i merge implementations of quantitative and non-quantitative method in a new class use such as vllm like ExpertsSelector.select_experts ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? test in qwen3-moe and all ut. - vLLM version: v0.10.0 - vLLM main: e18859298d Signed-off-by: yangcheng <yangcheng104@huawei.com> Co-authored-by: yangcheng (AJ) <y00806874@china.huawei.com>
2025-08-14 11:50:53 +08:00
parent 103654ccd6
commit e14f2ef669
10 changed files with 359 additions and 370 deletions
--- a/tests/e2e/singlecard/ops/test_fused_moe.py
+++ b/tests/e2e/singlecard/ops/test_fused_moe.py
@@ -26,7 +26,8 @@ import pytest
 import torch
 from vllm.model_executor.layers.activation import SiluAndMul

-from vllm_ascend.ops.fused_moe import fused_experts, select_experts
+from vllm_ascend.ops.fused_moe import fused_experts
+from vllm_ascend.ops.layers.experts_selector import select_experts

 NUM_EXPERTS = [8, 64]
 EP_SIZE = [1, 4]
@@ -142,7 +143,7 @@ def test_select_experts(
                                 dtype=torch.int32)
        custom_routing_function.return_value = (mock_weights, mock_ids)

-    with patch("vllm_ascend.ops.fused_moe.native_grouped_topk"
+    with patch("vllm_ascend.ops.layers.experts_selector._native_grouped_topk"
               ) as mock_native_grouped_topk:
        mock_native_grouped_topk.side_effect = lambda x, num_groups, k: torch.randn_like(
            x)