fix: correct scale parameter remapping logic in Llama4ForConditionalGeneration (#11282)
This commit is contained in:
@@ -710,7 +710,7 @@ class Llama4ForConditionalGeneration(nn.Module):
|
|||||||
"""Handle scale parameter remapping. Returns True if handled."""
|
"""Handle scale parameter remapping. Returns True if handled."""
|
||||||
if "scale" in name and "expert" not in name:
|
if "scale" in name and "expert" not in name:
|
||||||
remapped_name = maybe_remap_kv_scale_name(name, params_dict)
|
remapped_name = maybe_remap_kv_scale_name(name, params_dict)
|
||||||
return remapped_name is not None and remapped_name != name
|
return remapped_name != name
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _handle_stacked_params(
|
def _handle_stacked_params(
|
||||||
|
|||||||
Reference in New Issue
Block a user