Support XiaomiMiMo inference with mtp (#6059)

This commit is contained in:
ryang
2025-05-23 05:14:49 +08:00
committed by GitHub
parent 0b07c4a99f
commit a6ae3af15e
6 changed files with 344 additions and 6 deletions

View File

@@ -73,6 +73,7 @@ class ModelConfig:
model_override_args=self.model_override_args,
**kwargs,
)
self.hf_text_config = get_hf_text_config(self.hf_config)
self.attention_chunk_size = getattr(
self.hf_text_config, "attention_chunk_size", None
@@ -97,6 +98,8 @@ class ModelConfig:
):
self.hf_config.architectures[0] = "DeepseekV3ForCausalLMNextN"
if is_draft_model and self.hf_config.architectures[0] == "MiMoForCausalLM":
self.hf_config.architectures[0] = "MiMoMTP"
# Check model type
self.is_generation = is_generation_model(
self.hf_config.architectures, is_embedding