Fix and Clean up chat-template requirement for VLM (#6114)

Signed-off-by: Xinyuan Tong <justinning0323@outlook.com>
2025-05-10 09:14:09 -07:00
parent c178abdabc
commit 9d8ec2e67e
16 changed files with 104 additions and 195 deletions
--- a/test/srt/models/test_vlm_models.py
+++ b/test/srt/models/test_vlm_models.py
@@ -19,17 +19,12 @@ from sglang.test.test_utils import (

 # VLM models for testing
 MODELS = [
-    SimpleNamespace(
-        model="google/gemma-3-27b-it", chat_template="gemma-it", mmmu_accuracy=0.45
-    ),
+    SimpleNamespace(model="google/gemma-3-27b-it", mmmu_accuracy=0.45),
    SimpleNamespace(
        model="Qwen/Qwen2.5-VL-3B-Instruct",
-        chat_template="qwen2-vl",
        mmmu_accuracy=0.4,
    ),
-    SimpleNamespace(
-        model="openbmb/MiniCPM-V-2_6", chat_template="minicpmv", mmmu_accuracy=0.4
-    ),
+    SimpleNamespace(model="openbmb/MiniCPM-V-2_6", mmmu_accuracy=0.4),
 ]


@@ -50,7 +45,6 @@ class TestVLMModels(CustomTestCase):
    def run_mmmu_eval(
        self,
        model_version: str,
-        chat_template: str,
        output_path: str,
        *,
        env: dict | None = None,
@@ -69,11 +63,7 @@ class TestVLMModels(CustomTestCase):
        os.makedirs(output_path, exist_ok=True)

        # -------- compose --model_args --------
-        model_args = (
-            f'model_version="{model_version}",'
-            f'chat_template="{chat_template}",'
-            f"tp={tp}"
-        )
+        model_args = f'model_version="{model_version}",' f"tp={tp}"

        # -------- build command list --------
        cmd = [
@@ -122,8 +112,6 @@ class TestVLMModels(CustomTestCase):
                    timeout=self.time_out,
                    api_key=self.api_key,
                    other_args=[
-                        "--chat-template",
-                        model.chat_template,
                        "--trust-remote-code",
                        "--cuda-graph-max-bs",
                        "32",
@@ -134,7 +122,7 @@ class TestVLMModels(CustomTestCase):
                )

                # Run evaluation
-                self.run_mmmu_eval(model.model, model.chat_template, "./logs")
+                self.run_mmmu_eval(model.model, "./logs")

                # Get the result file
                result_file_path = glob.glob("./logs/*.json")[0]
--- a/test/srt/test_bench_serving.py
+++ b/test/srt/test_bench_serving.py
@@ -156,8 +156,6 @@ class TestBenchServing(CustomTestCase):
            num_prompts=200,
            request_rate=float("inf"),
            other_server_args=[
-                "--chat-template",
-                DEFAULT_VLM_CHAT_TEMPLATE_FOR_TEST,
                "--mem-fraction-static",
                "0.7",
            ],
@@ -181,8 +179,6 @@ class TestBenchServing(CustomTestCase):
            num_prompts=50,
            request_rate=1,
            other_server_args=[
-                "--chat-template",
-                DEFAULT_VLM_CHAT_TEMPLATE_FOR_TEST,
                "--mem-fraction-static",
                "0.7",
            ],
--- a/test/srt/test_bnb.py
+++ b/test/srt/test_bnb.py
@@ -29,10 +29,10 @@ from sglang.test.test_utils import (
 )

 VISION_MODELS = [
-    ("unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit", "qwen2-vl"),
-    ("unsloth/Qwen2-VL-7B-Instruct-bnb-4bit", "qwen2-vl"),
-    ("unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit", "llama_3_vision"),
-    ("unsloth/Llama-3.2-11B-Vision-bnb-4bit", "llama_3_vision"),
+    "unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit",
+    "unsloth/Qwen2-VL-7B-Instruct-bnb-4bit",
+    "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit",
+    "unsloth/Llama-3.2-11B-Vision-bnb-4bit",
 ]
 LANGUAGE_MODELS = [
    "unsloth/Qwen2.5-7B-Instruct-bnb-4bit",
@@ -249,11 +249,9 @@ class TestVisionModel(CustomTestCase):
        if is_in_ci():
            models_to_test = [random.choice(VISION_MODELS)]

-        for model, template in models_to_test:
+        for model in models_to_test:
            with self.subTest(model=model):
                other_args = [
-                    "--chat-template",
-                    template,
                    "--mem-fraction-static",
                    "0.6",
                    "--load-format",
--- a/test/srt/test_openai_server.py
+++ b/test/srt/test_openai_server.py
@@ -688,7 +688,6 @@ class TestOpenAIServerIgnoreEOS(CustomTestCase):
            cls.base_url,
            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
            api_key=cls.api_key,
-            other_args=["--chat-template=llama_3_vision"],
        )
        cls.base_url += "/v1"
        cls.tokenizer = get_tokenizer(DEFAULT_SMALL_MODEL_NAME_FOR_TEST)
--- a/test/srt/test_vision_openai_server.py
+++ b/test/srt/test_vision_openai_server.py
@@ -614,7 +614,7 @@ class TestInternVL2_5Server(TestOpenAIVisionServer):
            cls.model,
            cls.base_url,
            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-            other_args=["--trust-remote-code", "--chat-template", "internvl-2-5"],
+            other_args=["--trust-remote-code"],
        )
        cls.base_url += "/v1"

@@ -676,8 +676,6 @@ class TestDeepseekVL2TinyServer(TestOpenAIVisionServer):
            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
            other_args=[
                "--trust-remote-code",
-                "--chat-template",
-                "deepseek-vl2",
                "--context-length",
                "4096",
            ],
@@ -775,8 +773,6 @@ class TestKimiVLServer(TestOpenAIVisionServer):
            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
            other_args=[
                "--trust-remote-code",
-                "--chat-template",
-                "kimi-vl",
                "--context-length",
                "4096",
                "--dtype",