diff --git a/python/sglang/srt/layers/quantization/w4afp8.py b/python/sglang/srt/layers/quantization/w4afp8.py index 8619c042b..ba11a4b6e 100644 --- a/python/sglang/srt/layers/quantization/w4afp8.py +++ b/python/sglang/srt/layers/quantization/w4afp8.py @@ -282,7 +282,7 @@ class W4AFp8MoEMethod(FusedMoEMethodBase): # TODO(ch-wan): move it out of this class from sglang.srt.layers.moe.cutlass_w4a8_moe import cutlass_w4a8_moe - topk_ids, topk_weights, _ = topk_output + topk_weights, topk_ids, _ = topk_output local_topk_ids = topk_ids if layer.expert_map is not None: "Translate info from expert_map to topk_ids"