diff --git a/docs/supported_models/multimodal_language_models.md b/docs/supported_models/multimodal_language_models.md
index a691c0967..665d8de7e 100644
--- a/docs/supported_models/multimodal_language_models.md
+++ b/docs/supported_models/multimodal_language_models.md
@@ -38,3 +38,4 @@ in the GitHub search bar.
 | **Kimi-VL** (A3B)          | `moonshotai/Kimi-VL-A3B-Instruct`          | `kimi-vl`        | Kimi-VL is a multimodal model that can understand and generate text from images.                                                                                                                                |
 | **Mistral-Small-3.1-24B**  | `mistralai/Mistral-Small-3.1-24B-Instruct-2503` | `mistral`   | Mistral 3.1 is a multimodal model that can generate text from text or images input. It also supports tool calling and structured output. |
 | **Phi-4-multimodal-instruct**  | `microsoft/Phi-4-multimodal-instruct` | `phi-4-mm`   | Phi-4-multimodal-instruct is the multimodal variant of the Phi-4-mini model, enhanced with LoRA for improved multimodal capabilities. Currently, it supports only text and vision modalities in SGLang. |
+| **MiMo-VL** (7B)           | `XiaomiMiMo/MiMo-VL-7B-RL`                 | `mimo-vl`        | Xiaomi's compact yet powerful vision-language model featuring a native resolution ViT encoder for fine-grained visual details, an MLP projector for cross-modal alignment, and the MiMo-7B language model optimized for complex reasoning tasks. |
diff --git a/python/sglang/srt/conversation.py b/python/sglang/srt/conversation.py
index c2608f1f1..facd82637 100644
--- a/python/sglang/srt/conversation.py
+++ b/python/sglang/srt/conversation.py
@@ -921,6 +921,19 @@ register_conv_template(
     )
 )
 
+register_conv_template(
+    Conversation(
+        name="mimo-vl",
+        system_message="You are MiMo, an AI assistant developed by Xiaomi.",
+        system_template="<|im_start|>system\n{system_message}",
+        roles=("<|im_start|>user", "<|im_start|>assistant"),
+        sep="<|im_end|>\n",
+        sep_style=SeparatorStyle.ADD_NEW_LINE_SINGLE,
+        stop_str=["<|im_end|>"],
+        image_token="<|vision_start|><|image_pad|><|vision_end|>",
+    )
+)
+
 
 register_conv_template(
     Conversation(
@@ -1049,3 +1062,9 @@ def match_phi_4_mm(model_path: str):
 def match_vila(model_path: str):
     if re.search(r"vila", model_path, re.IGNORECASE):
         return "chatml"
+
+
+@register_conv_template_matching_function
+def match_mimo_vl(model_path: str):
+    if re.search(r"mimo.*vl", model_path, re.IGNORECASE):
+        return "mimo-vl"
diff --git a/test/srt/test_vision_openai_server_a.py b/test/srt/test_vision_openai_server_a.py
index a4a2e770d..5c30eec1c 100644
--- a/test/srt/test_vision_openai_server_a.py
+++ b/test/srt/test_vision_openai_server_a.py
@@ -185,5 +185,25 @@ class TestMinicpmoServer(TestOpenAIVisionServer):
         self._test_audio_ambient_completion()
 
 
+class TestMimoVLServer(TestOpenAIVisionServer):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = "XiaomiMiMo/MiMo-VL-7B-RL"
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        cls.api_key = "sk-123456"
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            api_key=cls.api_key,
+            other_args=[
+                "--trust-remote-code",
+                "--mem-fraction-static",
+                "0.6",
+            ],
+        )
+        cls.base_url += "/v1"
+
+
 if __name__ == "__main__":
     unittest.main()