remove moe_align_block_size torch.zeros in small batch/expert mode (#5298)
This commit is contained in:
@@ -707,7 +707,7 @@ def moe_align_block_size(
|
|||||||
dtype=torch.int32,
|
dtype=torch.int32,
|
||||||
device=topk_ids.device,
|
device=topk_ids.device,
|
||||||
)
|
)
|
||||||
cumsum_buffer = torch.zeros(
|
cumsum_buffer = torch.empty(
|
||||||
num_experts + 1, dtype=torch.int32, device=topk_ids.device
|
num_experts + 1, dtype=torch.int32, device=topk_ids.device
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user