diff --git a/python/sglang/srt/layers/triton_fused_moe/fused_moe.py b/python/sglang/srt/layers/triton_fused_moe/fused_moe.py index 86c189257..8a289a5c6 100644 --- a/python/sglang/srt/layers/triton_fused_moe/fused_moe.py +++ b/python/sglang/srt/layers/triton_fused_moe/fused_moe.py @@ -766,9 +766,10 @@ def fused_experts_impl( use_int8_w8a16=use_int8_w8a16, ) - ops.moe_sum( + torch.sum( intermediate_cache3.view(*intermediate_cache3.shape), - out_hidden_states[begin_chunk_idx:end_chunk_idx], + dim=1, + out=out_hidden_states[begin_chunk_idx:end_chunk_idx], ) return out_hidden_states