Feat: support cuda graph for LoRA (#4115)

Co-authored-by: Beichen Ma <mabeichen12@gmail.com>
This commit is contained in:
Qiaolin Yu
2025-04-29 02:30:44 -04:00
committed by GitHub
parent 2c3ea29476
commit 8c0cfca87d
13 changed files with 366 additions and 55 deletions

View File

@@ -23,7 +23,7 @@ from utils import (
DEFAULT_PROMPTS,
TORCH_DTYPES,
LoRAModelCase,
run_batch_lora_test,
run_lora_test_one_by_one,
)
from sglang.test.test_utils import CustomTestCase, is_in_ci
@@ -43,7 +43,7 @@ class TestLoRATP(CustomTestCase):
for tp_size in tp_list:
model_case.tp_size = tp_size
for torch_dtype in TORCH_DTYPES:
run_batch_lora_test(
run_lora_test_one_by_one(
prompts,
model_case,
torch_dtype,