[VLM] Support chunk prefill for VLM (#6355)

Co-authored-by: yizhang2077 <1109276519@qq.com>
2025-05-22 20:32:41 -07:00
parent 0a4fc73b48
commit 4685fbb888
20 changed files with 510 additions and 184 deletions
--- a/test/srt/test_vision_openai_server_common.py
+++ b/test/srt/test_vision_openai_server_common.py
@@ -294,20 +294,24 @@ class TestOpenAIVisionServer(CustomTestCase):
        print("-" * 30)

        # Add assertions to validate the video response
-        assert "iPod" in video_response or "device" in video_response, video_response
+        assert (
+            "iPod" in video_response or "device" in video_response
+        ), f"video_response: {video_response}, should contain 'iPod' or 'device'"
        assert (
            "man" in video_response
            or "person" in video_response
            or "individual" in video_response
            or "speaker" in video_response
-        ), video_response
+        ), f"video_response: {video_response}, should either have 'man' in video_response, or 'person' in video_response, or 'individual' in video_response or 'speaker' in video_response"
        assert (
            "present" in video_response
            or "examine" in video_response
            or "display" in video_response
            or "hold" in video_response
-        )
-        assert "black" in video_response or "dark" in video_response
+        ), f"video_response: {video_response}, should contain 'present', 'examine', 'display', or 'hold'"
+        assert (
+            "black" in video_response or "dark" in video_response
+        ), f"video_response: {video_response}, should contain 'black' or 'dark'"
        self.assertIsNotNone(video_response)
        self.assertGreater(len(video_response), 0)