From 3b87a9e8ae87ee998b98954b0813348ce6f34a78 Mon Sep 17 00:00:00 2001 From: Yuan Luo Date: Mon, 4 Aug 2025 11:05:02 +0800 Subject: [PATCH] Fix bug of refactoring TopKOutput in w4afp8 (#8745) Co-authored-by: luoyuan.luo --- python/sglang/srt/layers/quantization/w4afp8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/quantization/w4afp8.py b/python/sglang/srt/layers/quantization/w4afp8.py index 8619c042b..ba11a4b6e 100644 --- a/python/sglang/srt/layers/quantization/w4afp8.py +++ b/python/sglang/srt/layers/quantization/w4afp8.py @@ -282,7 +282,7 @@ class W4AFp8MoEMethod(FusedMoEMethodBase): # TODO(ch-wan): move it out of this class from sglang.srt.layers.moe.cutlass_w4a8_moe import cutlass_w4a8_moe - topk_ids, topk_weights, _ = topk_output + topk_weights, topk_ids, _ = topk_output local_topk_ids = topk_ids if layer.expert_map is not None: "Translate info from expert_map to topk_ids"