Feat: support cuda graph for LoRA (#4115)

Co-authored-by: Beichen Ma <mabeichen12@gmail.com>
This commit is contained in:
Qiaolin Yu
2025-04-29 02:30:44 -04:00
committed by GitHub
parent 2c3ea29476
commit 8c0cfca87d
13 changed files with 366 additions and 55 deletions

View File

@@ -80,6 +80,7 @@ suites = {
TestFile("test_vlm_accuracy.py", 60),
TestFile("test_vision_openai_server.py", 637),
TestFile("test_w8a8_quantization.py", 46),
TestFile("models/lora/test_lora_cuda_graph.py", 250),
],
"per-commit-2-gpu": [
TestFile("models/lora/test_lora_tp.py", 116),