[feat] Enable chunked prefill for llava-onevision (#2281)

This commit is contained in:
Ying Sheng
2024-12-02 20:19:02 -08:00
committed by GitHub
parent 69e2d4fb66
commit 480e38a733
5 changed files with 221 additions and 18 deletions

View File

@@ -39,6 +39,7 @@ suites = {
"test_triton_attention_kernels.py",
"test_triton_attention_backend.py",
"test_update_weights_from_disk.py",
"test_vision_chunked_prefill.py",
"test_vision_openai_server.py",
"test_session_control.py",
],