Revert "[MOE] enable efficient moe_alignment multi-blocks execution (3x~6x)" (#3982)

This commit is contained in:
Chayenne
2025-02-28 23:57:17 -08:00
committed by GitHub
parent 6b859e7ddd
commit 18bb216c28
5 changed files with 94 additions and 381 deletions

View File

@@ -171,12 +171,12 @@ def test_moe_align_block_size_compare_implementations(block_size, num_tokens, to
num_tokens_post_pad_cuda = torch.empty(
(1), dtype=torch.int32, device=topk_ids.device
)
token_cnts_buffer = torch.zeros(
token_cnts_buffer = torch.empty(
(num_experts + 1) * num_experts,
dtype=torch.int32,
device=topk_ids.device,
)
cumsum_buffer = torch.zeros(
cumsum_buffer = torch.empty(
num_experts + 1, dtype=torch.int32, device=topk_ids.device
)