[quantization] Fix scale remapping for mllama4 (#10042)

Co-authored-by: HAI <hixiao@gmail.com>
This commit is contained in:
Bowen Bao
2025-10-05 19:51:15 -07:00
committed by GitHub
parent 97d966a7f8
commit c7a104c12b

View File

@@ -700,7 +700,7 @@ class Llama4ForConditionalGeneration(nn.Module):
"""Handle scale parameter remapping. Returns True if handled."""
if "scale" in name and "expert" not in name:
remapped_name = maybe_remap_kv_scale_name(name, params_dict)
return remapped_name is None
return remapped_name is not None and remapped_name != name
return False
def _handle_stacked_params(