[feat] Support different attention backends for prefill and decode (#6338)

Co-authored-by: tianqilin.99 <tianqilin.99@bytedance.com> Co-authored-by: Baizhou Zhang <sobereddiezhang@gmail.com>
2025-07-27 20:42:29 -07:00
parent fe6a445d1e
commit 2810338401
9 changed files with 350 additions and 29 deletions
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -109,6 +109,7 @@ suites = {
        TestFile("test_vision_openai_server_b.py", 620),
        TestFile("test_w8a8_quantization.py", 46),
        TestFile("test_reasoning_parser.py", 5),
+        TestFile("test_hybrid_attn_backend.py", 100),
    ],
    "per-commit-amd": [
        TestFile("models/lora/test_lora_backend.py", 99),