Refactor LoRA handling to support adapter tensors in fused format (#6585)

2025-05-26 21:51:54 -07:00
parent 1a8f5f6836
commit 477a101cbd
6 changed files with 86 additions and 31 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -1473,7 +1473,7 @@ class ServerArgs:
            self.max_loras_per_batch > 0
            # FIXME
            and (self.lora_paths is None or self.disable_radix_cache)
-        ), "compatibility of lora and cuda graph and radix attention is in progress"
+        ), "compatibility of lora and radix attention is in progress"
        assert self.base_gpu_id >= 0, "base_gpu_id must be non-negative"
        assert self.gpu_id_step >= 1, "gpu_id_step must be positive"