diff --git a/vllm_ascend/quantization/quant_config.py b/vllm_ascend/quantization/quant_config.py index 339e6f00..1490f394 100644 --- a/vllm_ascend/quantization/quant_config.py +++ b/vllm_ascend/quantization/quant_config.py @@ -117,6 +117,18 @@ class AscendQuantConfig(QuantizationConfig): prefix: str) -> Optional["QuantizeMethodBase"]: vllm_config = get_current_vllm_config() model_type = vllm_config.model_config.hf_text_config.model_type + + if model_type in ["minimax", "minimax_m2"]: + prefix = prefix.replace("mlp", "block_sparse_moe") + + #To adapt to minimax, modify the prefix of the model layer name + parts = prefix.split('.') + if "experts" in parts and len(parts) > 2: + exp_idx = parts.index("experts") + if exp_idx + 1 < len(parts) and parts[exp_idx + 1].isdigit(): + parts = parts[:exp_idx + 1] + prefix = ".".join(parts) + if model_type in packed_modules_model_mapping: self.packed_modules_mapping = packed_modules_model_mapping[ model_type] @@ -312,6 +324,14 @@ packed_modules_model_mapping = { ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"], "fused_qkv_a_proj": ["q_a_proj", "kv_a_proj_with_mqa"] }, + "minimax_m2": { + "qkv_proj": [ + "q_proj", + "k_proj", + "v_proj", + ], + "experts": ["experts.0.w1", "experts.0.w2", "experts.0.w3"] + } }