From fa271613809bc5d901c0e864c4f9b9d3d3a101bd Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Sun, 24 Nov 2024 22:37:04 +0800 Subject: [PATCH] fix: use torch.sum for compatible (#2161) --- python/sglang/srt/layers/triton_fused_moe/fused_moe.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/layers/triton_fused_moe/fused_moe.py b/python/sglang/srt/layers/triton_fused_moe/fused_moe.py index 86c189257..8a289a5c6 100644 --- a/python/sglang/srt/layers/triton_fused_moe/fused_moe.py +++ b/python/sglang/srt/layers/triton_fused_moe/fused_moe.py @@ -766,9 +766,10 @@ def fused_experts_impl( use_int8_w8a16=use_int8_w8a16, ) - ops.moe_sum( + torch.sum( intermediate_cache3.view(*intermediate_cache3.shape), - out_hidden_states[begin_chunk_idx:end_chunk_idx], + dim=1, + out=out_hidden_states[begin_chunk_idx:end_chunk_idx], ) return out_hidden_states