[2/2] Introduce Chunked-SGMV kernels and corresponding LoRA backend for improved performance (#10286)

2025-09-15 16:04:03 -07:00
parent 2689f0bf02
commit 3f41b48c40
10 changed files with 1499 additions and 13 deletions
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -24,6 +24,7 @@ suites = {
        TestFile("lora/test_lora_update.py", 400),
        TestFile("lora/test_lora_qwen3.py", 97),
        TestFile("lora/test_lora_radix_cache.py", 100),
+        TestFile("lora/test_chunked_sgmv_backend.py", 30),
        TestFile("models/test_embedding_models.py", 73),
        # TestFile("models/test_clip_models.py", 52),
        TestFile("models/test_encoder_embedding_models.py", 100),