Fix bug of refactoring TopKOutput in w4afp8 (#8745)
Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
This commit is contained in:
@@ -282,7 +282,7 @@ class W4AFp8MoEMethod(FusedMoEMethodBase):
|
|||||||
# TODO(ch-wan): move it out of this class
|
# TODO(ch-wan): move it out of this class
|
||||||
from sglang.srt.layers.moe.cutlass_w4a8_moe import cutlass_w4a8_moe
|
from sglang.srt.layers.moe.cutlass_w4a8_moe import cutlass_w4a8_moe
|
||||||
|
|
||||||
topk_ids, topk_weights, _ = topk_output
|
topk_weights, topk_ids, _ = topk_output
|
||||||
local_topk_ids = topk_ids
|
local_topk_ids = topk_ids
|
||||||
if layer.expert_map is not None:
|
if layer.expert_map is not None:
|
||||||
"Translate info from expert_map to topk_ids"
|
"Translate info from expert_map to topk_ids"
|
||||||
|
|||||||
Reference in New Issue
Block a user