Support XiaomiMiMo/MiMo model inference (#5921)

This commit is contained in:
ryang
2025-05-01 22:41:13 +08:00
committed by GitHub
parent 9858113c33
commit 4322c31e24
2 changed files with 172 additions and 0 deletions

View File

@@ -107,6 +107,7 @@ class FlashInferAttnBackend(AttentionBackend):
if (
"Qwen2ForCausalLM" in model_runner.model_config.hf_config.architectures
or "Qwen3ForCausalLM" in model_runner.model_config.hf_config.architectures
or "MiMoForCausalLM" in model_runner.model_config.hf_config.architectures
):
global_config.flashinfer_workspace_size = 512 * 1024 * 1024