Model: Support Qwen 2.5 vl (#3258)

2025-02-16 16:58:53 +08:00
parent 39416e394a
commit bcc213df61
11 changed files with 2000 additions and 262 deletions
--- a/test/srt/test_vision_openai_server.py
+++ b/test/srt/test_vision_openai_server.py
@@ -252,6 +252,18 @@ class TestOpenAIVisionServer(unittest.TestCase):
        print("-" * 30)

        # Add assertions to validate the video response
+        assert "iPod" in video_response or "device" in video_response, video_response
+        assert (
+            "man" in video_response
+            or "person" in video_response
+            or "individual" in video_response
+        ), video_response
+        assert (
+            "present" in video_response
+            or "examine" in video_response
+            or "display" in video_response
+        )
+        assert "black" in video_response or "dark" in video_response
        self.assertIsNotNone(video_response)
        self.assertGreater(len(video_response), 0)

@@ -366,6 +378,30 @@ class TestQWen2VLServer(TestOpenAIVisionServer):
        cls.base_url += "/v1"


+class TestQWen2_5_VLServer(TestOpenAIVisionServer):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = "Qwen/Qwen2.5-VL-7B-Instruct"
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        cls.api_key = "sk-123456"
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            api_key=cls.api_key,
+            other_args=[
+                "--chat-template",
+                "qwen2-vl",
+                # FIXME: workaround to chunked prefill within image embeds
+                "--chunked-prefill-size",
+                "10000",
+                "--mem-fraction-static",
+                "0.4",
+            ],
+        )
+        cls.base_url += "/v1"
+
+
 class TestQWen2VLServerContextLengthIssue(unittest.TestCase):
    @classmethod
    def setUpClass(cls):