From c7a104c12bda83c2da12b91f189cf65639e34d37 Mon Sep 17 00:00:00 2001
From: Bowen Bao <bowenbao@amd.com>
Date: Sun, 5 Oct 2025 19:51:15 -0700
Subject: [PATCH] [quantization] Fix scale remapping for mllama4 (#10042)

Co-authored-by: HAI <hixiao@gmail.com>
---
 python/sglang/srt/models/mllama4.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/sglang/srt/models/mllama4.py b/python/sglang/srt/models/mllama4.py
index 72077d96a..4bc0e275f 100644
--- a/python/sglang/srt/models/mllama4.py
+++ b/python/sglang/srt/models/mllama4.py
@@ -700,7 +700,7 @@ class Llama4ForConditionalGeneration(nn.Module):
         """Handle scale parameter remapping. Returns True if handled."""
         if "scale" in name and "expert" not in name:
             remapped_name = maybe_remap_kv_scale_name(name, params_dict)
-            return remapped_name is None
+            return remapped_name is not None and remapped_name != name
         return False
 
     def _handle_stacked_params(