Fuse routed scaling factor in deepseek (#6970)

This commit is contained in:
Xiaoyu Zhang
2025-06-09 06:24:24 +08:00
committed by GitHub
parent 971a0dfa32
commit 3712abfaf9
10 changed files with 338 additions and 15 deletions

View File

@@ -317,6 +317,7 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
a1_scale=layer.w13_input_scale,
a2_scale=layer.w2_input_scale,
apply_router_weight_on_input=apply_router_weight_on_input,
routed_scaling_factor=routed_scaling_factor,
)