fix GLM4_MOE launch with compressed_tensor quant model (#8456)

2025-07-28 01:31:20 -07:00
parent 581e7dcb92
commit 25f73c6cf3
1 changed files with 1 additions and 0 deletions
--- a/python/sglang/srt/models/glm4_moe.py
+++ b/python/sglang/srt/models/glm4_moe.py
@@ -795,6 +795,7 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM):
                elif (
                    self.quant_config.get_name() == "fp8"
                    or self.quant_config.get_name() == "blockwise_int8"
+                    or self.quant_config.get_name() == "compressed_tensors"
                ):
                    suffix_list = [
                        "down_proj.weight",