From 60bcbf2a35e290a351386f4dbe85f5b0534acb14 Mon Sep 17 00:00:00 2001 From: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com> Date: Sat, 12 Apr 2025 03:13:55 +0800 Subject: [PATCH] remove moe_align_block_size torch.zeros in small batch/expert mode (#5298) --- python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py index c90be69e6..5d3e319e5 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +++ b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py @@ -707,7 +707,7 @@ def moe_align_block_size( dtype=torch.int32, device=topk_ids.device, ) - cumsum_buffer = torch.zeros( + cumsum_buffer = torch.empty( num_experts + 1, dtype=torch.int32, device=topk_ids.device )