fix moe-ep accuracy issue for fp8 (#2489)

This commit is contained in:
xiaobochen
2024-12-16 20:54:02 +08:00
committed by GitHub
parent a0592c059f
commit b532a5fd16

View File

@@ -644,6 +644,10 @@ class Fp8EPMoEMethod(Fp8MoEMethod):
"QuantConfig has static quantization, but found "
"activation scales are None."
)
layer.w13_weight_scale = torch.nn.Parameter(
torch.max(layer.w13_weight_scale, dim=1).values,
requires_grad=False,
)
return
def apply(