Fix and Clean up chat-template requirement for VLM (#6114)
Signed-off-by: Xinyuan Tong <justinning0323@outlook.com>
This commit is contained in:
@@ -19,17 +19,12 @@ from sglang.test.test_utils import (
|
||||
|
||||
# VLM models for testing
|
||||
MODELS = [
|
||||
SimpleNamespace(
|
||||
model="google/gemma-3-27b-it", chat_template="gemma-it", mmmu_accuracy=0.45
|
||||
),
|
||||
SimpleNamespace(model="google/gemma-3-27b-it", mmmu_accuracy=0.45),
|
||||
SimpleNamespace(
|
||||
model="Qwen/Qwen2.5-VL-3B-Instruct",
|
||||
chat_template="qwen2-vl",
|
||||
mmmu_accuracy=0.4,
|
||||
),
|
||||
SimpleNamespace(
|
||||
model="openbmb/MiniCPM-V-2_6", chat_template="minicpmv", mmmu_accuracy=0.4
|
||||
),
|
||||
SimpleNamespace(model="openbmb/MiniCPM-V-2_6", mmmu_accuracy=0.4),
|
||||
]
|
||||
|
||||
|
||||
@@ -50,7 +45,6 @@ class TestVLMModels(CustomTestCase):
|
||||
def run_mmmu_eval(
|
||||
self,
|
||||
model_version: str,
|
||||
chat_template: str,
|
||||
output_path: str,
|
||||
*,
|
||||
env: dict | None = None,
|
||||
@@ -69,11 +63,7 @@ class TestVLMModels(CustomTestCase):
|
||||
os.makedirs(output_path, exist_ok=True)
|
||||
|
||||
# -------- compose --model_args --------
|
||||
model_args = (
|
||||
f'model_version="{model_version}",'
|
||||
f'chat_template="{chat_template}",'
|
||||
f"tp={tp}"
|
||||
)
|
||||
model_args = f'model_version="{model_version}",' f"tp={tp}"
|
||||
|
||||
# -------- build command list --------
|
||||
cmd = [
|
||||
@@ -122,8 +112,6 @@ class TestVLMModels(CustomTestCase):
|
||||
timeout=self.time_out,
|
||||
api_key=self.api_key,
|
||||
other_args=[
|
||||
"--chat-template",
|
||||
model.chat_template,
|
||||
"--trust-remote-code",
|
||||
"--cuda-graph-max-bs",
|
||||
"32",
|
||||
@@ -134,7 +122,7 @@ class TestVLMModels(CustomTestCase):
|
||||
)
|
||||
|
||||
# Run evaluation
|
||||
self.run_mmmu_eval(model.model, model.chat_template, "./logs")
|
||||
self.run_mmmu_eval(model.model, "./logs")
|
||||
|
||||
# Get the result file
|
||||
result_file_path = glob.glob("./logs/*.json")[0]
|
||||
|
||||
@@ -156,8 +156,6 @@ class TestBenchServing(CustomTestCase):
|
||||
num_prompts=200,
|
||||
request_rate=float("inf"),
|
||||
other_server_args=[
|
||||
"--chat-template",
|
||||
DEFAULT_VLM_CHAT_TEMPLATE_FOR_TEST,
|
||||
"--mem-fraction-static",
|
||||
"0.7",
|
||||
],
|
||||
@@ -181,8 +179,6 @@ class TestBenchServing(CustomTestCase):
|
||||
num_prompts=50,
|
||||
request_rate=1,
|
||||
other_server_args=[
|
||||
"--chat-template",
|
||||
DEFAULT_VLM_CHAT_TEMPLATE_FOR_TEST,
|
||||
"--mem-fraction-static",
|
||||
"0.7",
|
||||
],
|
||||
|
||||
@@ -29,10 +29,10 @@ from sglang.test.test_utils import (
|
||||
)
|
||||
|
||||
VISION_MODELS = [
|
||||
("unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit", "qwen2-vl"),
|
||||
("unsloth/Qwen2-VL-7B-Instruct-bnb-4bit", "qwen2-vl"),
|
||||
("unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit", "llama_3_vision"),
|
||||
("unsloth/Llama-3.2-11B-Vision-bnb-4bit", "llama_3_vision"),
|
||||
"unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit",
|
||||
"unsloth/Qwen2-VL-7B-Instruct-bnb-4bit",
|
||||
"unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit",
|
||||
"unsloth/Llama-3.2-11B-Vision-bnb-4bit",
|
||||
]
|
||||
LANGUAGE_MODELS = [
|
||||
"unsloth/Qwen2.5-7B-Instruct-bnb-4bit",
|
||||
@@ -249,11 +249,9 @@ class TestVisionModel(CustomTestCase):
|
||||
if is_in_ci():
|
||||
models_to_test = [random.choice(VISION_MODELS)]
|
||||
|
||||
for model, template in models_to_test:
|
||||
for model in models_to_test:
|
||||
with self.subTest(model=model):
|
||||
other_args = [
|
||||
"--chat-template",
|
||||
template,
|
||||
"--mem-fraction-static",
|
||||
"0.6",
|
||||
"--load-format",
|
||||
|
||||
@@ -688,7 +688,6 @@ class TestOpenAIServerIgnoreEOS(CustomTestCase):
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
api_key=cls.api_key,
|
||||
other_args=["--chat-template=llama_3_vision"],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
cls.tokenizer = get_tokenizer(DEFAULT_SMALL_MODEL_NAME_FOR_TEST)
|
||||
|
||||
@@ -614,7 +614,7 @@ class TestInternVL2_5Server(TestOpenAIVisionServer):
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=["--trust-remote-code", "--chat-template", "internvl-2-5"],
|
||||
other_args=["--trust-remote-code"],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
@@ -676,8 +676,6 @@ class TestDeepseekVL2TinyServer(TestOpenAIVisionServer):
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--chat-template",
|
||||
"deepseek-vl2",
|
||||
"--context-length",
|
||||
"4096",
|
||||
],
|
||||
@@ -775,8 +773,6 @@ class TestKimiVLServer(TestOpenAIVisionServer):
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--chat-template",
|
||||
"kimi-vl",
|
||||
"--context-length",
|
||||
"4096",
|
||||
"--dtype",
|
||||
|
||||
Reference in New Issue
Block a user