From 4aa39d72c42250c022f9e5db828154d875cb1757 Mon Sep 17 00:00:00 2001 From: chenqianfzh <51831990+chenqianfzh@users.noreply.github.com> Date: Thu, 11 Sep 2025 23:47:48 -0700 Subject: [PATCH] fix the break in FlashInferFusedMoE (#10356) Co-authored-by: Ho-Ren (Jack) Chuang --- python/sglang/srt/layers/moe/fused_moe_triton/layer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py index 4ceba1d49..f46521c3a 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py +++ b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py @@ -26,6 +26,7 @@ from sglang.srt.layers.moe import ( from sglang.srt.layers.moe.token_dispatcher.standard import ( CombineInput, StandardDispatcher, + StandardDispatchOutput, ) from sglang.srt.layers.moe.topk import TopKOutput, TopKOutputChecker from sglang.srt.layers.quantization.base_config import ( @@ -981,8 +982,9 @@ class FlashInferFusedMoE(FusedMoE): # Matrix multiply. final_hidden_states = self.quant_method.apply_with_router_logits( layer=self, - x=hidden_states, - topk_output=topk_output, + dispatch_output=StandardDispatchOutput( + hidden_states=hidden_states, topk_output=topk_output + ), ) if self.reduce_results and (self.moe_tp_size > 1 or self.moe_ep_size > 1):