[feat] Support different attention backends for prefill and decode (#6338)

Co-authored-by: tianqilin.99 <tianqilin.99@bytedance.com>
Co-authored-by: Baizhou Zhang <sobereddiezhang@gmail.com>
This commit is contained in:
Qiaolin Yu
2025-07-27 20:42:29 -07:00
committed by GitHub
parent fe6a445d1e
commit 2810338401
9 changed files with 350 additions and 29 deletions

View File

@@ -109,6 +109,7 @@ suites = {
TestFile("test_vision_openai_server_b.py", 620),
TestFile("test_w8a8_quantization.py", 46),
TestFile("test_reasoning_parser.py", 5),
TestFile("test_hybrid_attn_backend.py", 100),
],
"per-commit-amd": [
TestFile("models/lora/test_lora_backend.py", 99),