[2/2] Introduce Chunked-SGMV kernels and corresponding LoRA backend for improved performance (#10286)

This commit is contained in:
Lifu Huang
2025-09-15 16:04:03 -07:00
committed by GitHub
parent 2689f0bf02
commit 3f41b48c40
10 changed files with 1499 additions and 13 deletions

View File

@@ -24,6 +24,7 @@ suites = {
TestFile("lora/test_lora_update.py", 400),
TestFile("lora/test_lora_qwen3.py", 97),
TestFile("lora/test_lora_radix_cache.py", 100),
TestFile("lora/test_chunked_sgmv_backend.py", 30),
TestFile("models/test_embedding_models.py", 73),
# TestFile("models/test_clip_models.py", 52),
TestFile("models/test_encoder_embedding_models.py", 100),