Support XiaomiMiMo/MiMo model inference (#5921)

2025-05-01 22:41:13 +08:00
parent 9858113c33
commit 4322c31e24
2 changed files with 172 additions and 0 deletions
--- a/python/sglang/srt/layers/attention/flashinfer_backend.py
+++ b/python/sglang/srt/layers/attention/flashinfer_backend.py
@@ -107,6 +107,7 @@ class FlashInferAttnBackend(AttentionBackend):
        if (
            "Qwen2ForCausalLM" in model_runner.model_config.hf_config.architectures
            or "Qwen3ForCausalLM" in model_runner.model_config.hf_config.architectures
+            or "MiMoForCausalLM" in model_runner.model_config.hf_config.architectures
        ):
            global_config.flashinfer_workspace_size = 512 * 1024 * 1024