fix: correct scale parameter remapping logic in Llama4ForConditionalGeneration (#11282)
This commit is contained in:
@@ -710,7 +710,7 @@ class Llama4ForConditionalGeneration(nn.Module):
|
||||
"""Handle scale parameter remapping. Returns True if handled."""
|
||||
if "scale" in name and "expert" not in name:
|
||||
remapped_name = maybe_remap_kv_scale_name(name, params_dict)
|
||||
return remapped_name is not None and remapped_name != name
|
||||
return remapped_name != name
|
||||
return False
|
||||
|
||||
def _handle_stacked_params(
|
||||
|
||||
Reference in New Issue
Block a user