Fix bug of refactoring TopKOutput in w4afp8 (#8745)
Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
This commit is contained in:
@@ -282,7 +282,7 @@ class W4AFp8MoEMethod(FusedMoEMethodBase):
|
||||
# TODO(ch-wan): move it out of this class
|
||||
from sglang.srt.layers.moe.cutlass_w4a8_moe import cutlass_w4a8_moe
|
||||
|
||||
topk_ids, topk_weights, _ = topk_output
|
||||
topk_weights, topk_ids, _ = topk_output
|
||||
local_topk_ids = topk_ids
|
||||
if layer.expert_map is not None:
|
||||
"Translate info from expert_map to topk_ids"
|
||||
|
||||
Reference in New Issue
Block a user