From 54dd3ea12277f782823c8067ed723279136c40bb Mon Sep 17 00:00:00 2001 From: HAI Date: Tue, 29 Oct 2024 13:58:03 -0700 Subject: [PATCH] =?UTF-8?q?[FP8=20KV=20Cache,=20Mixtral]=20Avoid=20KeyErro?= =?UTF-8?q?r=20at=20loading=20pre-quantized=20FP8=20m=E2=80=A6=20(#1835)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/sglang/srt/models/mixtral.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/sglang/srt/models/mixtral.py b/python/sglang/srt/models/mixtral.py index 6ad802367..dc4198b52 100644 --- a/python/sglang/srt/models/mixtral.py +++ b/python/sglang/srt/models/mixtral.py @@ -369,6 +369,9 @@ class MixtralForCausalLM(nn.Module): # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: continue + # Skip loading kv_scale from ckpts towards new design. + if name.endswith(".kv_scale") and name not in params_dict: + continue if name is None: continue