model(vlm): pixtral (#5084)

2025-05-13 00:16:10 -07:00
parent b2e95f62b4
commit 5380cd7ea3
16 changed files with 1125 additions and 39 deletions
--- a/test/srt/models/test_generation_models.py
+++ b/test/srt/models/test_generation_models.py
@@ -14,14 +14,15 @@
 """
 Usage:

-To test a specific model:
-1. Add it to ALL_OTHER_MODELS
-2. Run `ONLY_RUN=Qwen/Qwen2-1.5B python3 -m unittest test_generation_models.TestGenerationModels.test_others`
+To test a specific model locally:
+1. Add it to ALL_MODELS, for example, `ModelCase("Qwen/Qwen2-1.5B")`
+2. Run `ONLY_RUN=Qwen/Qwen2-1.5B python3 -m unittest test_generation_models.TestGenerationModels`
 """

 import dataclasses
 import multiprocessing as mp
 import os
+import random
 import unittest
 from typing import List

@@ -53,8 +54,9 @@ CI_MODELS = [
    ModelCase("google/gemma-2-2b"),
 ]

-# All other models that do not run on the CI
-ALL_OTHER_MODELS = [
+# the complete set of models to test sglang's generation model
+ALL_MODELS = [
+    *CI_MODELS,
    ModelCase("Qwen/Qwen2-1.5B"),
    ModelCase("Qwen/Qwen2.5-14B-Instruct"),
    ModelCase("HuggingFaceTB/SmolLM-135M-Instruct", skip_long_prompt=True),
@@ -63,7 +65,7 @@ ALL_OTHER_MODELS = [
        "THUDM/glm-4-9b-chat", tp_size=2, trust_remote_code=True, skip_long_prompt=True
    ),
    ModelCase("openai-community/gpt2"),
-    ModelCase("microsoft/Phi-3-small-8k-instruct"),
+    ModelCase("microsoft/Phi-3-small-8k-instruct", trust_remote_code=True),
    ModelCase("allenai/OLMo-2-1124-7B-Instruct", skip_long_prompt=True),
    ModelCase("ibm-granite/granite-3.0-2b-instruct", skip_long_prompt=True),
 ]
@@ -117,9 +119,30 @@ class TestGenerationModels(CustomTestCase):
            debug_text=f"model_path={model_path} prompts={prompts}",
        )

+    @unittest.skipIf(not is_in_ci(), "Local test should run all models")
    def test_ci_models(self):
        for model_case in CI_MODELS:
            for torch_dtype in TORCH_DTYPES:
+                prompts = DEFAULT_PROMPTS
+
+                # Skip long prompts for models that do not have a long context
+                if model_case.skip_long_prompt:
+                    prompts = [p for p in DEFAULT_PROMPTS if len(p) < 1000]
+
+                # Assert the logits and output strs are close
+                self.assert_close_logits_and_output_strs(
+                    prompts, model_case, torch_dtype
+                )
+
+    @unittest.skipIf(is_in_ci(), "CI only runs selected models for simplicity")
+    def test_all_models(self):
+        for model_case in ALL_MODELS:
+            for torch_dtype in TORCH_DTYPES:
+                if (
+                    "ONLY_RUN" in os.environ
+                    and os.environ["ONLY_RUN"] != model_case.model_path
+                ):
+                    continue

                # Skip long prompts for models that do not have a long context
                prompts = DEFAULT_PROMPTS
@@ -131,26 +154,6 @@ class TestGenerationModels(CustomTestCase):
                    prompts, model_case, torch_dtype
                )

-    def test_others(self):
-        if is_in_ci():
-            return
-
-        for model_case in ALL_OTHER_MODELS:
-            # Only run a specified model
-            if (
-                "ONLY_RUN" in os.environ
-                and os.environ["ONLY_RUN"] != model_case.model_path
-            ):
-                continue
-
-            # Skip long prompts for models that do not have a long context
-            prompts = DEFAULT_PROMPTS
-            if model_case.skip_long_prompt:
-                prompts = [p for p in DEFAULT_PROMPTS if len(p) < 1000]
-
-            # Assert the logits and output strs are close
-            self.assert_close_logits_and_output_strs(prompts, model_case, torch.float16)
-

 if __name__ == "__main__":
    unittest.main()
--- a/test/srt/test_vision_openai_server.py
+++ b/test/srt/test_vision_openai_server.py
@@ -642,6 +642,28 @@ class TestMinicpmoServer(TestOpenAIVisionServer):
        self._test_audio_ambient_completion()


+class TestPixtralServer(TestOpenAIVisionServer):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = "mistral-community/pixtral-12b"
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        cls.api_key = "sk-123456"
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            other_args=[
+                "--trust-remote-code",
+                "--mem-fraction-static",
+                "0.73",
+            ],
+        )
+        cls.base_url += "/v1"
+
+    def test_video_chat_completion(self):
+        pass
+
+
 class TestDeepseekVL2Server(TestOpenAIVisionServer):
    @classmethod
    def setUpClass(cls):