Refactor LoRA handling to support adapter tensors in fused format (#6585)

This commit is contained in:
Lifu Huang
2025-05-26 21:51:54 -07:00
committed by GitHub
parent 1a8f5f6836
commit 477a101cbd
6 changed files with 86 additions and 31 deletions

View File

@@ -1473,7 +1473,7 @@ class ServerArgs:
self.max_loras_per_batch > 0
# FIXME
and (self.lora_paths is None or self.disable_radix_cache)
), "compatibility of lora and cuda graph and radix attention is in progress"
), "compatibility of lora and radix attention is in progress"
assert self.base_gpu_id >= 0, "base_gpu_id must be non-negative"
assert self.gpu_id_step >= 1, "gpu_id_step must be positive"