[1/2] Refactor LoRA to support backend-specific batch preprocessing. (#10251)

This commit is contained in:
Lifu Huang
2025-09-10 09:58:37 -07:00
committed by GitHub
parent cda7e47ce7
commit 941002945b
6 changed files with 227 additions and 130 deletions

View File

@@ -28,6 +28,9 @@ from torch import nn
from sglang.srt.configs.load_config import LoadConfig
from sglang.srt.hf_transformers_utils import AutoConfig
from sglang.srt.lora.backend.base_backend import BaseLoRABackend
# from sglang.srt.lora.backend.chunked_backend import ChunkedSgmvLoRABackend
from sglang.srt.lora.backend.triton_backend import TritonLoRABackend
from sglang.srt.lora.lora_config import LoRAConfig
from sglang.srt.model_loader.loader import DefaultModelLoader
@@ -156,7 +159,7 @@ class LoRAAdapter(nn.Module):
gate_up_name = weight_name.replace("gate_proj", "gate_up_proj")
if up_name not in weights:
weights[up_name] = torch.zeros_like(weights[weight_name])
assert self.lora_backend.name == "triton", (
assert isinstance(self.lora_backend, TritonLoRABackend), (
f"LoRA weight initialization currently only supported for 'triton' backend. "
f"Received backend: {self.lora_backend.name}. Please verify your backend configuration "
f"or consider implementing custom initialization logic for other backends."