fix: correct scale parameter remapping logic in Llama4ForConditionalGeneration (#11282)

This commit is contained in:
Xinyuan Tong
2025-10-06 17:28:23 -07:00
committed by GitHub
parent afc35ccc5e
commit fd8a0b29c0

View File

@@ -710,7 +710,7 @@ class Llama4ForConditionalGeneration(nn.Module):
"""Handle scale parameter remapping. Returns True if handled."""
if "scale" in name and "expert" not in name:
remapped_name = maybe_remap_kv_scale_name(name, params_dict)
return remapped_name is not None and remapped_name != name
return remapped_name != name
return False
def _handle_stacked_params(