fix: fix video input for qwen3-vl (#11361)

2025-10-10 19:35:35 +08:00
parent 4299aebdbb
commit a1a20b4c7c
4 changed files with 45 additions and 3 deletions
--- a/test/srt/test_vision_openai_server_a.py
+++ b/test/srt/test_vision_openai_server_a.py
@@ -50,6 +50,27 @@ class TestQwen2VLServer(ImageOpenAITestMixin, VideoOpenAITestMixin):
        cls.base_url += "/v1"


+class TestQwen3VLServer(ImageOpenAITestMixin, VideoOpenAITestMixin):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = "Qwen/Qwen3-VL-30B-A3B-Instruct"
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        cls.api_key = "sk-123456"
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            api_key=cls.api_key,
+            other_args=[
+                "--mem-fraction-static",
+                "0.80",
+                "--cuda-graph-max-bs",
+                "4",
+            ],
+        )
+        cls.base_url += "/v1"
+
+
 class TestQwen2_5_VLServer(ImageOpenAITestMixin, VideoOpenAITestMixin):
    @classmethod
    def setUpClass(cls):
--- a/test/srt/test_vision_openai_server_common.py
+++ b/test/srt/test_vision_openai_server_common.py
@@ -494,7 +494,7 @@ class VideoOpenAITestMixin(TestOpenAIOmniServerBase):
            **(self.get_vision_request_kwargs()),
        )

-        video_response = response.choices[0].message.content
+        video_response = response.choices[0].message.content.lower()

        print("-" * 30)
        print(f"Video response:\n{video_response}")
@@ -502,9 +502,10 @@ class VideoOpenAITestMixin(TestOpenAIOmniServerBase):

        # Add assertions to validate the video response
        assert (
-            "iPod" in video_response
+            "ipod" in video_response
            or "device" in video_response
            or "microphone" in video_response
+            or "phone" in video_response
        ), f"video_response: {video_response}, should contain 'iPod' or 'device'"
        assert (
            "man" in video_response