Fix gptq for moe layers (#2300)

Co-authored-by: root <me@zhyncs.com>
This commit is contained in:
Lianmin Zheng
2024-12-03 07:12:33 -08:00
committed by GitHub
parent fda628d8f2
commit 1228f7ca69
2 changed files with 44 additions and 2 deletions

View File

@@ -339,7 +339,9 @@ class MixtralForCausalLM(nn.Module):
continue
name = name.replace(weight_name, param_name)
# Skip loading extra bias for GPTQ models.
if name.endswith(".bias") and name not in params_dict:
if (
name.endswith(".bias") or name.endswith("_bias")
) and name not in params_dict:
continue
param = params_dict[name]
@@ -353,6 +355,10 @@ class MixtralForCausalLM(nn.Module):
continue
name = name.replace(weight_name, param_name)
if (
name.endswith(".bias") or name.endswith("_bias")
) and name not in params_dict:
continue
param = params_dict[name]
weight_loader = param.weight_loader
weight_loader(
@@ -365,7 +371,9 @@ class MixtralForCausalLM(nn.Module):
break
else:
# Skip loading extra bias for GPTQ models.
if name.endswith(".bias") and name not in params_dict:
if (
name.endswith(".bias") or name.endswith("_bias")
) and name not in params_dict:
continue
# Skip loading kv_scale from ckpts towards new design.
if name.endswith(".kv_scale") and name not in params_dict: