From 25f73c6cf3c2b20441266693ad12030157c1cbef Mon Sep 17 00:00:00 2001 From: Minglei Zhu Date: Mon, 28 Jul 2025 01:31:20 -0700 Subject: [PATCH] fix GLM4_MOE launch with compressed_tensor quant model (#8456) --- python/sglang/srt/models/glm4_moe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/sglang/srt/models/glm4_moe.py b/python/sglang/srt/models/glm4_moe.py index 9716557f4..f080beb50 100644 --- a/python/sglang/srt/models/glm4_moe.py +++ b/python/sglang/srt/models/glm4_moe.py @@ -795,6 +795,7 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM): elif ( self.quant_config.get_name() == "fp8" or self.quant_config.get_name() == "blockwise_int8" + or self.quant_config.get_name() == "compressed_tensors" ): suffix_list = [ "down_proj.weight",