Refactor vlm embedding routine to use precomputed feature (#6543)

Signed-off-by: Xinyuan Tong <justinning0323@outlook.com>
2025-05-24 18:39:21 -07:00
parent 0d47788025
commit 681fdc264b
8 changed files with 285 additions and 203 deletions
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -81,7 +81,7 @@ suites = {
        TestFile("test_update_weights_from_tensor.py", 48),
        TestFile("test_vertex_endpoint.py", 31),
        TestFile("test_vision_chunked_prefill.py", 175),
-        TestFile("test_vlm_accuracy.py", 60),
+        TestFile("test_vlm_input_format.py", 300),
        TestFile("test_vision_openai_server_a.py", 700),
        TestFile("test_vision_openai_server_b.py", 700),
        TestFile("test_w8a8_quantization.py", 46),