diff --git a/python/sglang/bench_one_batch_server.py b/python/sglang/bench_one_batch_server.py index adcad2ec4..920286e33 100644 --- a/python/sglang/bench_one_batch_server.py +++ b/python/sglang/bench_one_batch_server.py @@ -137,12 +137,10 @@ def generate_markdown_report(trace_dir, results: List["BenchmarkResult"]) -> str # all results should share the same isl & osl for result in results: - base_url = os.getenv( - "TRACE_BASE_URL", "https://github.com/sgl-project/ci-data/traces" - ).rstrip("/") + base_url = os.getenv("TRACE_BASE_URL", "").rstrip("/") relay_base = os.getenv( "PERFETTO_RELAY_URL", - "https://docs.sglang.ai/ci-data/pages/perfetto_relay.html", + "", ).rstrip("/") summary += result.to_markdown_row(trace_dir, base_url, relay_base) diff --git a/python/sglang/srt/models/mllama.py b/python/sglang/srt/models/mllama.py index fa294ddcd..8f89c32f1 100644 --- a/python/sglang/srt/models/mllama.py +++ b/python/sglang/srt/models/mllama.py @@ -901,7 +901,7 @@ class MllamaForConditionalGeneration(nn.Module): img = pixel_values[0, j] num_tiles = img.shape[0] batched_images[i, j, :num_tiles] = img - batched_ar_ids[i, j] = mm_input.mm_items[0].aspect_ratio_id[0, j] + batched_ar_ids[i, j] = mm_input.mm_items[0].aspect_ratio_ids[0, j] batched_ar_mask[i, j, :num_tiles] = mm_input.mm_items[ 0 diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 2ee87b21f..7d1a12eef 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -131,8 +131,7 @@ suites = { TestFile("test_triton_sliding_window.py", 250), TestFile("test_utils_update_weights.py", 48), TestFile("test_vision_chunked_prefill.py", 175), - TestFile("test_vision_openai_server_a.py", 724), - TestFile("test_vision_openai_server_b.py", 446), + TestFile("test_vision_openai_server_a.py", 608), TestFile("test_vlm_input_format.py", 300), ], "per-commit-2-gpu": [ diff --git a/test/srt/test_vision_openai_server_a.py b/test/srt/test_vision_openai_server_a.py index b8f4c64c4..021bce3cf 100644 --- a/test/srt/test_vision_openai_server_a.py +++ b/test/srt/test_vision_openai_server_a.py @@ -8,91 +8,34 @@ import unittest from test_vision_openai_server_common import * -from sglang.test.test_utils import ( - DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - popen_launch_server, -) + +class TestLlavaServer(ImageOpenAITestMixin): + model = "lmms-lab/llava-onevision-qwen2-0.5b-ov" -class TestLlava(ImageOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "lmms-lab/llava-onevision-qwen2-0.5b-ov" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - api_key=cls.api_key, - ) - cls.base_url += "/v1" - - -class TestQwen2VLServer(ImageOpenAITestMixin, VideoOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "Qwen/Qwen2-VL-7B-Instruct" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - api_key=cls.api_key, - other_args=[ - "--mem-fraction-static", - "0.35", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" +class TestQwen25VLServer(ImageOpenAITestMixin, VideoOpenAITestMixin): + model = "Qwen/Qwen2.5-VL-7B-Instruct" + extra_args = [ + "--cuda-graph-max-bs=4", + ] class TestQwen3VLServer(ImageOpenAITestMixin, VideoOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "Qwen/Qwen3-VL-30B-A3B-Instruct" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - api_key=cls.api_key, - other_args=[ - "--mem-fraction-static", - "0.80", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" + model = "Qwen/Qwen3-VL-30B-A3B-Instruct" + extra_args = ["--cuda-graph-max-bs=4"] -class TestQwen2_5_VLServer(ImageOpenAITestMixin, VideoOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "Qwen/Qwen2.5-VL-7B-Instruct" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - api_key=cls.api_key, - other_args=[ - "--mem-fraction-static", - "0.35", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" +class TestQwen3OmniServer(OmniOpenAITestMixin): + model = "Qwen/Qwen3-Omni-30B-A3B-Instruct" + extra_args = [ # workaround to fit into H100 + "--mem-fraction-static=0.90", + "--disable-cuda-graph", + "--disable-fast-image-processor", + "--grammar-backend=none", + ] -class TestVLMContextLengthIssue(CustomTestCase): +class TestQwen2VLContextLengthServer(CustomTestCase): @classmethod def setUpClass(cls): cls.model = "Qwen/Qwen2-VL-7B-Instruct" @@ -106,7 +49,6 @@ class TestVLMContextLengthIssue(CustomTestCase): other_args=[ "--context-length", "300", - "--mem-fraction-static=0.75", "--cuda-graph-max-bs", "4", ], @@ -149,210 +91,62 @@ class TestVLMContextLengthIssue(CustomTestCase): ) -# Note(Xinyuan): mllama is not stable for now, skip for CI -# class TestMllamaServer(TestOpenAIVisionServer): -# @classmethod -# def setUpClass(cls): -# cls.model = "meta-llama/Llama-3.2-11B-Vision-Instruct" -# cls.base_url = DEFAULT_URL_FOR_TEST -# cls.api_key = "sk-123456" -# cls.process = popen_launch_server( -# cls.model, -# cls.base_url, -# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, -# api_key=cls.api_key, -# ) -# cls.base_url += "/v1" +# flaky +# class TestMllamaServer(ImageOpenAITestMixin): +# model = "meta-llama/Llama-3.2-11B-Vision-Instruct" -class TestMinicpmvServer(ImageOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "openbmb/MiniCPM-V-2_6" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.35", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" +class TestInternVL25Server(ImageOpenAITestMixin): + model = "OpenGVLab/InternVL2_5-2B" + extra_args = [ + "--cuda-graph-max-bs=4", + ] -class TestMinicpmv4Server(ImageOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "openbmb/MiniCPM-V-4" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.35", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" +class TestMiniCPMV4Server(ImageOpenAITestMixin): + model = "openbmb/MiniCPM-V-4" + extra_args = [ + "--cuda-graph-max-bs=4", + ] -class TestInternVL2_5Server(ImageOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "OpenGVLab/InternVL2_5-2B" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" +class TestMiniCPMo26Server(ImageOpenAITestMixin, AudioOpenAITestMixin): + model = "openbmb/MiniCPM-o-2_6" + extra_args = [ + "--cuda-graph-max-bs=4", + ] -class TestMinicpmo2_6Server(ImageOpenAITestMixin, AudioOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "openbmb/MiniCPM-o-2_6" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.65", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" +class TestGemma3itServer(ImageOpenAITestMixin): + model = "google/gemma-3-4b-it" + extra_args = [ + "--cuda-graph-max-bs=4", + ] -class TestMimoVLServer(ImageOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "XiaomiMiMo/MiMo-VL-7B-RL" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - api_key=cls.api_key, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.6", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" +class TestKimiVLServer(ImageOpenAITestMixin): + model = "moonshotai/Kimi-VL-A3B-Instruct" + extra_args = [ + "--context-length=8192", + "--dtype=bfloat16", + ] - -class TestVILAServer(ImageOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "Efficient-Large-Model/NVILA-Lite-2B-hf-0626" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.revision = "6bde1de5964b40e61c802b375fff419edc867506" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - api_key=cls.api_key, - other_args=[ - "--trust-remote-code", - "--context-length=65536", - f"--revision={cls.revision}", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" - - -class TestPhi4MMServer(ImageOpenAITestMixin, AudioOpenAITestMixin): - @classmethod - def setUpClass(cls): - # Manually download LoRA adapter_config.json as it's not downloaded by the model loader by default. - from huggingface_hub import constants, snapshot_download - - snapshot_download( - "microsoft/Phi-4-multimodal-instruct", - allow_patterns=["**/adapter_config.json"], - ) - - cls.model = "microsoft/Phi-4-multimodal-instruct" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - - revision = "33e62acdd07cd7d6635badd529aa0a3467bb9c6a" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.70", - "--disable-radix-cache", - "--max-loras-per-batch", - "2", - "--revision", - revision, - "--lora-paths", - f"vision={constants.HF_HUB_CACHE}/models--microsoft--Phi-4-multimodal-instruct/snapshots/{revision}/vision-lora", - f"speech={constants.HF_HUB_CACHE}/models--microsoft--Phi-4-multimodal-instruct/snapshots/{revision}/speech-lora", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" - - def get_vision_request_kwargs(self): - return { - "extra_body": { - "lora_path": "vision", - "top_k": 1, - "top_p": 1.0, - } - } - - def get_audio_request_kwargs(self): - return { - "extra_body": { - "lora_path": "speech", - "top_k": 1, - "top_p": 1.0, - } - } - - # This _test_audio_ambient_completion test is way too complicated to pass for a small LLM - def test_audio_ambient_completion(self): + def test_video_images_chat_completion(self): + # model context length exceeded pass +class TestGLM41VServer(ImageOpenAITestMixin, VideoOpenAITestMixin): + model = "zai-org/GLM-4.1V-9B-Thinking" + extra_args = [ + "--reasoning-parser=glm45", + ] + + +class TestQwen2AudioServer(AudioOpenAITestMixin): + model = "Qwen/Qwen2-Audio-7B-Instruct" + + if __name__ == "__main__": del ( TestOpenAIMLLMServerBase, diff --git a/test/srt/test_vision_openai_server_b.py b/test/srt/test_vision_openai_server_b.py deleted file mode 100644 index 304896e73..000000000 --- a/test/srt/test_vision_openai_server_b.py +++ /dev/null @@ -1,275 +0,0 @@ -import unittest - -from test_vision_openai_server_common import * - -from sglang.test.test_utils import ( - DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - popen_launch_server, -) - - -class TestPixtralServer(ImageOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "mistral-community/pixtral-12b" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.70", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" - - -class TestMistral3_1Server(ImageOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "unsloth/Mistral-Small-3.1-24B-Instruct-2503" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.75", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" - - -class TestDeepseekVL2Server(ImageOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "deepseek-ai/deepseek-vl2-small" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--context-length", - "4096", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" - - -class TestJanusProServer(ImageOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "deepseek-ai/Janus-Pro-7B" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.35", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" - - def test_video_images_chat_completion(self): - pass - - -## Skip for ci test -# class TestLlama4Server(TestOpenAIVisionServer): -# @classmethod -# def setUpClass(cls): -# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct" -# cls.base_url = DEFAULT_URL_FOR_TEST -# cls.api_key = "sk-123456" -# cls.process = popen_launch_server( -# cls.model, -# cls.base_url, -# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, -# other_args=[ -# "--chat-template", -# "llama-4", -# "--mem-fraction-static", -# "0.8", -# "--tp-size=8", -# "--context-length=8192", -# "--mm-attention-backend", -# "fa3", -# "--cuda-graph-max-bs", -# "4", -# ], -# ) -# cls.base_url += "/v1" - - -class TestGemma3itServer(ImageOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "google/gemma-3-4b-it" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.70", - "--enable-multimodal", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" - - -class TestGemma3nServer(ImageOpenAITestMixin, AudioOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "google/gemma-3n-E4B-it" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.70", - "--cuda-graph-max-bs", - "4", - ], - ) - cls.base_url += "/v1" - - # This _test_audio_ambient_completion test is way too complicated to pass for a small LLM - def test_audio_ambient_completion(self): - pass - - def _test_mixed_image_audio_chat_completion(self): - self._test_mixed_image_audio_chat_completion() - - -class TestQwen2AudioServer(AudioOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "Qwen/Qwen2-Audio-7B-Instruct" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.70", - ], - ) - cls.base_url += "/v1" - - -# Temporarily skip Kimi-VL for CI test due to issue in transformers=4.57.0 -# class TestKimiVLServer(ImageOpenAITestMixin): -# @classmethod -# def setUpClass(cls): -# cls.model = "moonshotai/Kimi-VL-A3B-Instruct" -# cls.base_url = DEFAULT_URL_FOR_TEST -# cls.api_key = "sk-123456" -# cls.process = popen_launch_server( -# cls.model, -# cls.base_url, -# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, -# other_args=[ -# "--trust-remote-code", -# "--context-length", -# "4096", -# "--dtype", -# "bfloat16", -# "--cuda-graph-max-bs", -# "4", -# ], -# ) -# cls.base_url += "/v1" - -# def test_video_images_chat_completion(self): -# pass - - -class TestGLM41VServer(ImageOpenAITestMixin, VideoOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "zai-org/GLM-4.1V-9B-Thinking" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--trust-remote-code", - "--mem-fraction-static", - "0.68", - "--cuda-graph-max-bs", - "4", - "--reasoning-parser", - "glm45", - ], - ) - cls.base_url += "/v1" - - -class TestQwen3OmniServer(OmniOpenAITestMixin): - @classmethod - def setUpClass(cls): - cls.model = "Qwen/Qwen3-Omni-30B-A3B-Instruct" - cls.base_url = DEFAULT_URL_FOR_TEST - cls.api_key = "sk-123456" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ # workaround to fit into H100 - "--trust-remote-code", - "--mem-fraction-static", - "0.90", - "--disable-cuda-graph", - "--disable-fast-image-processor", - "--grammar-backend", - "none", - ], - ) - cls.base_url += "/v1" - - -if __name__ == "__main__": - del ( - TestOpenAIMLLMServerBase, - ImageOpenAITestMixin, - VideoOpenAITestMixin, - AudioOpenAITestMixin, - OmniOpenAITestMixin, - ) - unittest.main() diff --git a/test/srt/test_vision_openai_server_common.py b/test/srt/test_vision_openai_server_common.py index ec8a5fce3..66f0c0d7c 100644 --- a/test/srt/test_vision_openai_server_common.py +++ b/test/srt/test_vision_openai_server_common.py @@ -9,7 +9,12 @@ import requests from PIL import Image from sglang.srt.utils import kill_process_tree -from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, CustomTestCase +from sglang.test.test_utils import ( + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + CustomTestCase, + popen_launch_server, +) # image IMAGE_MAN_IRONING_URL = "https://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/images/man_ironing_on_back_of_suv.png" @@ -24,12 +29,21 @@ AUDIO_BIRD_SONG_URL = "https://raw.githubusercontent.com/sgl-project/sgl-test-fi class TestOpenAIMLLMServerBase(CustomTestCase): + model: str + extra_args: list = [] + fixed_args: list = ["--trust-remote-code", "--enable-multimodal"] + @classmethod def setUpClass(cls): - cls.model = "" cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" - cls.process = None + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + api_key=cls.api_key, + other_args=cls.extra_args + cls.fixed_args, + ) cls.base_url += "/v1" @classmethod @@ -421,7 +435,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase): or "device" in video_response or "microphone" in video_response ), f""" - ====================== video_response ===================== + ====================== video_images response ===================== {video_response} =========================================================== should contain 'iPod' or 'device' or 'microphone' @@ -435,7 +449,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase): or "Steve" in video_response or "hand" in video_response ), f""" - ====================== video_response ===================== + ====================== video_images response ===================== {video_response} =========================================================== should contain 'man' or 'person' or 'individual' or 'speaker' or 'presenter' or 'Steve' or 'hand' @@ -446,7 +460,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase): or "display" in video_response or "hold" in video_response ), f""" - ====================== video_response ===================== + ====================== video_images response ===================== {video_response} =========================================================== should contain 'present' or 'examine' or 'display' or 'hold'