From c7a104c12bda83c2da12b91f189cf65639e34d37 Mon Sep 17 00:00:00 2001 From: Bowen Bao Date: Sun, 5 Oct 2025 19:51:15 -0700 Subject: [PATCH] [quantization] Fix scale remapping for mllama4 (#10042) Co-authored-by: HAI --- python/sglang/srt/models/mllama4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/models/mllama4.py b/python/sglang/srt/models/mllama4.py index 72077d96a..4bc0e275f 100644 --- a/python/sglang/srt/models/mllama4.py +++ b/python/sglang/srt/models/mllama4.py @@ -700,7 +700,7 @@ class Llama4ForConditionalGeneration(nn.Module): """Handle scale parameter remapping. Returns True if handled.""" if "scale" in name and "expert" not in name: remapped_name = maybe_remap_kv_scale_name(name, params_dict) - return remapped_name is None + return remapped_name is not None and remapped_name != name return False def _handle_stacked_params(