[quantization] Fix scale remapping for mllama4 (#10042)

Co-authored-by: HAI <hixiao@gmail.com>
2025-10-05 19:51:15 -07:00
parent 97d966a7f8
commit c7a104c12b
1 changed files with 1 additions and 1 deletions
--- a/python/sglang/srt/models/mllama4.py
+++ b/python/sglang/srt/models/mllama4.py
@@ -700,7 +700,7 @@ class Llama4ForConditionalGeneration(nn.Module):
        """Handle scale parameter remapping. Returns True if handled."""
        if "scale" in name and "expert" not in name:
            remapped_name = maybe_remap_kv_scale_name(name, params_dict)
-            return remapped_name is None
+            return remapped_name is not None and remapped_name != name
        return False

    def _handle_stacked_params(