fix GLM4_MOE launch with compressed_tensor quant model (#8456)
This commit is contained in:
@@ -795,6 +795,7 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM):
|
|||||||
elif (
|
elif (
|
||||||
self.quant_config.get_name() == "fp8"
|
self.quant_config.get_name() == "fp8"
|
||||||
or self.quant_config.get_name() == "blockwise_int8"
|
or self.quant_config.get_name() == "blockwise_int8"
|
||||||
|
or self.quant_config.get_name() == "compressed_tensors"
|
||||||
):
|
):
|
||||||
suffix_list = [
|
suffix_list = [
|
||||||
"down_proj.weight",
|
"down_proj.weight",
|
||||||
|
|||||||
Reference in New Issue
Block a user