Remove the vllm dependency from the moe_align function (#4164)

Co-authored-by: Hongbosherlock <hongbosherlock@gmail.com>
This commit is contained in:
lukec
2025-03-08 14:42:16 +08:00
committed by GitHub
parent d4017a6b63
commit b93ef5e56d
2 changed files with 15 additions and 11 deletions

View File

@@ -138,18 +138,20 @@ def moe_align_block_size_triton(
@pytest.mark.parametrize(
"block_size,num_tokens,topk",
"block_size,num_tokens,topk,num_experts",
list(
itertools.product(
[32, 64, 128, 256], # block_size
[1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096], # num_tokens
[1, 2, 4, 8, 16, 32, 64], # topk
[64, 160, 256], # num_experts
)
),
)
def test_moe_align_block_size_compare_implementations(block_size, num_tokens, topk):
def test_moe_align_block_size_compare_implementations(
block_size, num_tokens, topk, num_experts
):
# For DeepSeek V3, we have 256 experts
num_experts = 256
topk_ids = torch.stack(
[