Refactor LoRA handling to support adapter tensors in fused format (#6585)
This commit is contained in:
@@ -1473,7 +1473,7 @@ class ServerArgs:
|
||||
self.max_loras_per_batch > 0
|
||||
# FIXME
|
||||
and (self.lora_paths is None or self.disable_radix_cache)
|
||||
), "compatibility of lora and cuda graph and radix attention is in progress"
|
||||
), "compatibility of lora and radix attention is in progress"
|
||||
assert self.base_gpu_id >= 0, "base_gpu_id must be non-negative"
|
||||
assert self.gpu_id_step >= 1, "gpu_id_step must be positive"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user