Support radix cache for Lora feature (#7216)

This commit is contained in:
Baizhou Zhang
2025-08-11 10:14:11 -07:00
committed by GitHub
parent 6f81a710f7
commit 75e6a7cde1
12 changed files with 546 additions and 27 deletions

View File

@@ -2004,11 +2004,7 @@ class ServerArgs:
), "chunked_prefill_size must be divisible by page_size"
def check_lora_server_args(self):
assert (
self.max_loras_per_batch > 0
# FIXME
and (self.lora_paths is None or self.disable_radix_cache)
), "compatibility of lora and radix attention is in progress"
assert self.max_loras_per_batch > 0, "max_loras_per_batch must be positive"
# Enable LoRA if any LoRA paths are provided for backward compatibility.
if self.lora_paths: