fix moe-ep accuracy issue for fp8 (#2489)
This commit is contained in:
@@ -644,6 +644,10 @@ class Fp8EPMoEMethod(Fp8MoEMethod):
|
|||||||
"QuantConfig has static quantization, but found "
|
"QuantConfig has static quantization, but found "
|
||||||
"activation scales are None."
|
"activation scales are None."
|
||||||
)
|
)
|
||||||
|
layer.w13_weight_scale = torch.nn.Parameter(
|
||||||
|
torch.max(layer.w13_weight_scale, dim=1).values,
|
||||||
|
requires_grad=False,
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
def apply(
|
def apply(
|
||||||
|
|||||||
Reference in New Issue
Block a user