Support radix cache for Lora feature (#7216)

2025-08-11 10:14:11 -07:00
parent 6f81a710f7
commit 75e6a7cde1
12 changed files with 546 additions and 27 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -2004,11 +2004,7 @@ class ServerArgs:
        ), "chunked_prefill_size must be divisible by page_size"

    def check_lora_server_args(self):
-        assert (
-            self.max_loras_per_batch > 0
-            # FIXME
-            and (self.lora_paths is None or self.disable_radix_cache)
-        ), "compatibility of lora and radix attention is in progress"
+        assert self.max_loras_per_batch > 0, "max_loras_per_batch must be positive"

        # Enable LoRA if any LoRA paths are provided for backward compatibility.
        if self.lora_paths: