Fix and Clean up chat-template requirement for VLM (#6114)

Signed-off-by: Xinyuan Tong <justinning0323@outlook.com>
This commit is contained in:
XinyuanTong
2025-05-10 09:14:09 -07:00
committed by GitHub
parent c178abdabc
commit 9d8ec2e67e
16 changed files with 104 additions and 195 deletions

View File

@@ -19,17 +19,12 @@ from sglang.test.test_utils import (
# VLM models for testing
MODELS = [
SimpleNamespace(
model="google/gemma-3-27b-it", chat_template="gemma-it", mmmu_accuracy=0.45
),
SimpleNamespace(model="google/gemma-3-27b-it", mmmu_accuracy=0.45),
SimpleNamespace(
model="Qwen/Qwen2.5-VL-3B-Instruct",
chat_template="qwen2-vl",
mmmu_accuracy=0.4,
),
SimpleNamespace(
model="openbmb/MiniCPM-V-2_6", chat_template="minicpmv", mmmu_accuracy=0.4
),
SimpleNamespace(model="openbmb/MiniCPM-V-2_6", mmmu_accuracy=0.4),
]
@@ -50,7 +45,6 @@ class TestVLMModels(CustomTestCase):
def run_mmmu_eval(
self,
model_version: str,
chat_template: str,
output_path: str,
*,
env: dict | None = None,
@@ -69,11 +63,7 @@ class TestVLMModels(CustomTestCase):
os.makedirs(output_path, exist_ok=True)
# -------- compose --model_args --------
model_args = (
f'model_version="{model_version}",'
f'chat_template="{chat_template}",'
f"tp={tp}"
)
model_args = f'model_version="{model_version}",' f"tp={tp}"
# -------- build command list --------
cmd = [
@@ -122,8 +112,6 @@ class TestVLMModels(CustomTestCase):
timeout=self.time_out,
api_key=self.api_key,
other_args=[
"--chat-template",
model.chat_template,
"--trust-remote-code",
"--cuda-graph-max-bs",
"32",
@@ -134,7 +122,7 @@ class TestVLMModels(CustomTestCase):
)
# Run evaluation
self.run_mmmu_eval(model.model, model.chat_template, "./logs")
self.run_mmmu_eval(model.model, "./logs")
# Get the result file
result_file_path = glob.glob("./logs/*.json")[0]

View File

@@ -156,8 +156,6 @@ class TestBenchServing(CustomTestCase):
num_prompts=200,
request_rate=float("inf"),
other_server_args=[
"--chat-template",
DEFAULT_VLM_CHAT_TEMPLATE_FOR_TEST,
"--mem-fraction-static",
"0.7",
],
@@ -181,8 +179,6 @@ class TestBenchServing(CustomTestCase):
num_prompts=50,
request_rate=1,
other_server_args=[
"--chat-template",
DEFAULT_VLM_CHAT_TEMPLATE_FOR_TEST,
"--mem-fraction-static",
"0.7",
],

View File

@@ -29,10 +29,10 @@ from sglang.test.test_utils import (
)
VISION_MODELS = [
("unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit", "qwen2-vl"),
("unsloth/Qwen2-VL-7B-Instruct-bnb-4bit", "qwen2-vl"),
("unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit", "llama_3_vision"),
("unsloth/Llama-3.2-11B-Vision-bnb-4bit", "llama_3_vision"),
"unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit",
"unsloth/Qwen2-VL-7B-Instruct-bnb-4bit",
"unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit",
"unsloth/Llama-3.2-11B-Vision-bnb-4bit",
]
LANGUAGE_MODELS = [
"unsloth/Qwen2.5-7B-Instruct-bnb-4bit",
@@ -249,11 +249,9 @@ class TestVisionModel(CustomTestCase):
if is_in_ci():
models_to_test = [random.choice(VISION_MODELS)]
for model, template in models_to_test:
for model in models_to_test:
with self.subTest(model=model):
other_args = [
"--chat-template",
template,
"--mem-fraction-static",
"0.6",
"--load-format",

View File

@@ -688,7 +688,6 @@ class TestOpenAIServerIgnoreEOS(CustomTestCase):
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=["--chat-template=llama_3_vision"],
)
cls.base_url += "/v1"
cls.tokenizer = get_tokenizer(DEFAULT_SMALL_MODEL_NAME_FOR_TEST)

View File

@@ -614,7 +614,7 @@ class TestInternVL2_5Server(TestOpenAIVisionServer):
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=["--trust-remote-code", "--chat-template", "internvl-2-5"],
other_args=["--trust-remote-code"],
)
cls.base_url += "/v1"
@@ -676,8 +676,6 @@ class TestDeepseekVL2TinyServer(TestOpenAIVisionServer):
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--chat-template",
"deepseek-vl2",
"--context-length",
"4096",
],
@@ -775,8 +773,6 @@ class TestKimiVLServer(TestOpenAIVisionServer):
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--chat-template",
"kimi-vl",
"--context-length",
"4096",
"--dtype",