Fix shared experts fusion error (#6289)
This commit is contained in:
@@ -1903,23 +1903,49 @@ class DeepseekV2ForCausalLM(nn.Module):
|
|||||||
if self.n_share_experts_fusion > 0:
|
if self.n_share_experts_fusion > 0:
|
||||||
weights_list = list(weights)
|
weights_list = list(weights)
|
||||||
weights_dict = dict(weights_list)
|
weights_dict = dict(weights_list)
|
||||||
if self.quant_config is None or self.quant_config.get_name() == "w8a8_int8":
|
if self.quant_config is not None:
|
||||||
suffix_list = [
|
if self.quant_config.get_name() == "w8a8_int8":
|
||||||
"down_proj.weight",
|
suffix_list = [
|
||||||
"down_proj.weight_scale",
|
"down_proj.weight",
|
||||||
"gate_proj.weight",
|
"down_proj.weight_scale",
|
||||||
"gate_proj.weight_scale",
|
"gate_proj.weight",
|
||||||
"up_proj.weight",
|
"gate_proj.weight_scale",
|
||||||
"up_proj.weight_scale",
|
"up_proj.weight",
|
||||||
]
|
"up_proj.weight_scale",
|
||||||
|
]
|
||||||
|
elif (
|
||||||
|
self.quant_config.get_name() == "fp8"
|
||||||
|
or self.quant_config.get_name() == "blockwise_int8"
|
||||||
|
):
|
||||||
|
suffix_list = [
|
||||||
|
"down_proj.weight",
|
||||||
|
"down_proj.weight_scale_inv",
|
||||||
|
"gate_proj.weight",
|
||||||
|
"gate_proj.weight_scale_inv",
|
||||||
|
"up_proj.weight",
|
||||||
|
"up_proj.weight_scale_inv",
|
||||||
|
]
|
||||||
|
elif self.quant_config.get_name() == "awq":
|
||||||
|
suffix_list = [
|
||||||
|
"down_proj.qweight",
|
||||||
|
"down_proj.qzeros",
|
||||||
|
"down_proj.scales",
|
||||||
|
"gate_proj.qweight",
|
||||||
|
"gate_proj.qzeros",
|
||||||
|
"gate_proj.scales",
|
||||||
|
"up_proj.qweight",
|
||||||
|
"up_proj.qzeros",
|
||||||
|
"up_proj.scales",
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Unsupported shared expert fusion for quantization: {self.quant_config.get_name()}."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
suffix_list = [
|
suffix_list = [
|
||||||
"down_proj.weight",
|
"down_proj.weight",
|
||||||
"down_proj.weight_scale_inv",
|
|
||||||
"gate_proj.weight",
|
"gate_proj.weight",
|
||||||
"gate_proj.weight_scale_inv",
|
|
||||||
"up_proj.weight",
|
"up_proj.weight",
|
||||||
"up_proj.weight_scale_inv",
|
|
||||||
]
|
]
|
||||||
names_to_remove = []
|
names_to_remove = []
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user