Remove the vllm dependency from the moe_align function (#4164)

Co-authored-by: Hongbosherlock <hongbosherlock@gmail.com>
2025-03-08 14:42:16 +08:00
parent d4017a6b63
commit b93ef5e56d
2 changed files with 15 additions and 11 deletions
--- a/sgl-kernel/tests/test_moe_align.py
+++ b/sgl-kernel/tests/test_moe_align.py
@@ -138,18 +138,20 @@ def moe_align_block_size_triton(


@pytest.mark.parametrize(
-    "block_size,num_tokens,topk",
+    "block_size,num_tokens,topk,num_experts",
    list(
        itertools.product(
            [32, 64, 128, 256],  # block_size
            [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096],  # num_tokens
            [1, 2, 4, 8, 16, 32, 64],  # topk
+            [64, 160, 256],  #  num_experts
        )
    ),
 )
-def test_moe_align_block_size_compare_implementations(block_size, num_tokens, topk):
+def test_moe_align_block_size_compare_implementations(
+    block_size, num_tokens, topk, num_experts
+):
    # For DeepSeek V3, we have 256 experts
-    num_experts = 256

    topk_ids = torch.stack(
        [