fix GLM4_MOE launch with compressed_tensor quant model (#8456)

This commit is contained in:
Minglei Zhu
2025-07-28 01:31:20 -07:00
committed by GitHub
parent 581e7dcb92
commit 25f73c6cf3

View File

@@ -795,6 +795,7 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM):
elif (
self.quant_config.get_name() == "fp8"
or self.quant_config.get_name() == "blockwise_int8"
or self.quant_config.get_name() == "compressed_tensors"
):
suffix_list = [
"down_proj.weight",