Support radix cache for Lora feature (#7216)
This commit is contained in:
@@ -2004,11 +2004,7 @@ class ServerArgs:
|
||||
), "chunked_prefill_size must be divisible by page_size"
|
||||
|
||||
def check_lora_server_args(self):
|
||||
assert (
|
||||
self.max_loras_per_batch > 0
|
||||
# FIXME
|
||||
and (self.lora_paths is None or self.disable_radix_cache)
|
||||
), "compatibility of lora and radix attention is in progress"
|
||||
assert self.max_loras_per_batch > 0, "max_loras_per_batch must be positive"
|
||||
|
||||
# Enable LoRA if any LoRA paths are provided for backward compatibility.
|
||||
if self.lora_paths:
|
||||
|
||||
Reference in New Issue
Block a user