[1/2] Refactor LoRA to support backend-specific batch preprocessing. (#10251)

2025-09-10 09:58:37 -07:00
parent cda7e47ce7
commit 941002945b
6 changed files with 227 additions and 130 deletions
--- a/python/sglang/srt/lora/lora.py
+++ b/python/sglang/srt/lora/lora.py
@@ -28,6 +28,9 @@ from torch import nn
 from sglang.srt.configs.load_config import LoadConfig
 from sglang.srt.hf_transformers_utils import AutoConfig
 from sglang.srt.lora.backend.base_backend import BaseLoRABackend
+
+# from sglang.srt.lora.backend.chunked_backend import ChunkedSgmvLoRABackend
+from sglang.srt.lora.backend.triton_backend import TritonLoRABackend
 from sglang.srt.lora.lora_config import LoRAConfig
 from sglang.srt.model_loader.loader import DefaultModelLoader

@@ -156,7 +159,7 @@ class LoRAAdapter(nn.Module):
                gate_up_name = weight_name.replace("gate_proj", "gate_up_proj")
                if up_name not in weights:
                    weights[up_name] = torch.zeros_like(weights[weight_name])
-                    assert self.lora_backend.name == "triton", (
+                    assert isinstance(self.lora_backend, TritonLoRABackend), (
                        f"LoRA weight initialization currently only supported for 'triton' backend. "
                        f"Received backend: {self.lora_backend.name}. Please verify your backend configuration "
                        f"or consider implementing custom initialization logic for other backends."