fix moe-ep accuracy issue for fp8 (#2489)
This commit is contained in:
@@ -644,6 +644,10 @@ class Fp8EPMoEMethod(Fp8MoEMethod):
|
||||
"QuantConfig has static quantization, but found "
|
||||
"activation scales are None."
|
||||
)
|
||||
layer.w13_weight_scale = torch.nn.Parameter(
|
||||
torch.max(layer.w13_weight_scale, dim=1).values,
|
||||
requires_grad=False,
|
||||
)
|
||||
return
|
||||
|
||||
def apply(
|
||||
|
||||
Reference in New Issue
Block a user