Feat: support cuda graph for LoRA (#4115)

Co-authored-by: Beichen Ma <mabeichen12@gmail.com>
This commit is contained in:
Qiaolin Yu
2025-04-29 02:30:44 -04:00
committed by GitHub
parent 2c3ea29476
commit 8c0cfca87d
13 changed files with 366 additions and 55 deletions

View File

@@ -1242,7 +1242,6 @@ class ServerArgs:
assert (
self.max_loras_per_batch > 0
# FIXME
and (self.lora_paths is None or self.disable_cuda_graph)
and (self.lora_paths is None or self.disable_radix_cache)
), "compatibility of lora and cuda graph and radix attention is in progress"
assert self.base_gpu_id >= 0, "base_gpu_id must be non-negative"