model(vlm): pixtral (#5084)

This commit is contained in:
Kiv Chen
2025-05-13 00:16:10 -07:00
committed by GitHub
parent b2e95f62b4
commit 5380cd7ea3
16 changed files with 1125 additions and 39 deletions

View File

@@ -14,14 +14,15 @@
"""
Usage:
To test a specific model:
1. Add it to ALL_OTHER_MODELS
2. Run `ONLY_RUN=Qwen/Qwen2-1.5B python3 -m unittest test_generation_models.TestGenerationModels.test_others`
To test a specific model locally:
1. Add it to ALL_MODELS, for example, `ModelCase("Qwen/Qwen2-1.5B")`
2. Run `ONLY_RUN=Qwen/Qwen2-1.5B python3 -m unittest test_generation_models.TestGenerationModels`
"""
import dataclasses
import multiprocessing as mp
import os
import random
import unittest
from typing import List
@@ -53,8 +54,9 @@ CI_MODELS = [
ModelCase("google/gemma-2-2b"),
]
# All other models that do not run on the CI
ALL_OTHER_MODELS = [
# the complete set of models to test sglang's generation model
ALL_MODELS = [
*CI_MODELS,
ModelCase("Qwen/Qwen2-1.5B"),
ModelCase("Qwen/Qwen2.5-14B-Instruct"),
ModelCase("HuggingFaceTB/SmolLM-135M-Instruct", skip_long_prompt=True),
@@ -63,7 +65,7 @@ ALL_OTHER_MODELS = [
"THUDM/glm-4-9b-chat", tp_size=2, trust_remote_code=True, skip_long_prompt=True
),
ModelCase("openai-community/gpt2"),
ModelCase("microsoft/Phi-3-small-8k-instruct"),
ModelCase("microsoft/Phi-3-small-8k-instruct", trust_remote_code=True),
ModelCase("allenai/OLMo-2-1124-7B-Instruct", skip_long_prompt=True),
ModelCase("ibm-granite/granite-3.0-2b-instruct", skip_long_prompt=True),
]
@@ -117,9 +119,30 @@ class TestGenerationModels(CustomTestCase):
debug_text=f"model_path={model_path} prompts={prompts}",
)
@unittest.skipIf(not is_in_ci(), "Local test should run all models")
def test_ci_models(self):
for model_case in CI_MODELS:
for torch_dtype in TORCH_DTYPES:
prompts = DEFAULT_PROMPTS
# Skip long prompts for models that do not have a long context
if model_case.skip_long_prompt:
prompts = [p for p in DEFAULT_PROMPTS if len(p) < 1000]
# Assert the logits and output strs are close
self.assert_close_logits_and_output_strs(
prompts, model_case, torch_dtype
)
@unittest.skipIf(is_in_ci(), "CI only runs selected models for simplicity")
def test_all_models(self):
for model_case in ALL_MODELS:
for torch_dtype in TORCH_DTYPES:
if (
"ONLY_RUN" in os.environ
and os.environ["ONLY_RUN"] != model_case.model_path
):
continue
# Skip long prompts for models that do not have a long context
prompts = DEFAULT_PROMPTS
@@ -131,26 +154,6 @@ class TestGenerationModels(CustomTestCase):
prompts, model_case, torch_dtype
)
def test_others(self):
if is_in_ci():
return
for model_case in ALL_OTHER_MODELS:
# Only run a specified model
if (
"ONLY_RUN" in os.environ
and os.environ["ONLY_RUN"] != model_case.model_path
):
continue
# Skip long prompts for models that do not have a long context
prompts = DEFAULT_PROMPTS
if model_case.skip_long_prompt:
prompts = [p for p in DEFAULT_PROMPTS if len(p) < 1000]
# Assert the logits and output strs are close
self.assert_close_logits_and_output_strs(prompts, model_case, torch.float16)
if __name__ == "__main__":
unittest.main()

View File

@@ -642,6 +642,28 @@ class TestMinicpmoServer(TestOpenAIVisionServer):
self._test_audio_ambient_completion()
class TestPixtralServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "mistral-community/pixtral-12b"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--mem-fraction-static",
"0.73",
],
)
cls.base_url += "/v1"
def test_video_chat_completion(self):
pass
class TestDeepseekVL2Server(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):