[feat] Enable chunked prefill for llava-onevision (#2412)

This commit is contained in:
Ying Sheng
2024-12-09 09:52:38 -08:00
committed by GitHub
parent 641b7d0ae0
commit 8586b72da0
5 changed files with 222 additions and 20 deletions

View File

@@ -39,6 +39,7 @@ suites = {
"test_triton_attention_kernels.py",
"test_triton_attention_backend.py",
"test_update_weights_from_disk.py",
"test_vision_chunked_prefill.py",
"test_vision_openai_server.py",
"test_session_control.py",
],