Revert "Fuse routed scaling factor in topk_reduce kernel (#6220)" (#6968)

This commit is contained in:
Yineng Zhang
2025-06-07 21:02:49 -07:00
committed by GitHub
parent c2c4f57f63
commit 1fb76ebb93
10 changed files with 9 additions and 331 deletions

View File

@@ -317,7 +317,6 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
a1_scale=layer.w13_input_scale,
a2_scale=layer.w2_input_scale,
apply_router_weight_on_input=apply_router_weight_on_input,
routed_scaling_factor=routed_scaling_factor,
)