Support dynamic LoRA loading / unloading in engine/server API (#7446)

2025-06-27 21:00:27 -07:00
parent cfe2edac38
commit 49538d111b
14 changed files with 949 additions and 31 deletions
--- a/python/sglang/srt/lora/lora.py
+++ b/python/sglang/srt/lora/lora.py
@@ -65,7 +65,7 @@ class LoRAAdapter(nn.Module):
        self.layers: List[LoRALayer] = nn.ModuleList(
            [
                LoRALayer(config, base_hf_config)
-                for i in range(base_hf_config.num_hidden_layers)
+                for _ in range(base_hf_config.num_hidden_layers)
            ]
        )

@@ -88,10 +88,9 @@ class LoRAAdapter(nn.Module):
            else:
                self.weights[name] = loaded_weight.cpu()

-        # stack kv_proj and gate_up_proj
-        for i in range(self.base_hf_config.num_hidden_layers):
-            layer = self.layers[i]
-            weight_names = [name for name, _ in layer.weights.items()]
+        # normalize kv_proj and gate_up_proj
+        for layer in self.layers:
+            weight_names = list(layer.weights.keys())
            self.normalize_qkv_proj(weight_names, layer.weights)
            self.normalize_gate_up_proj(weight_names, layer.weights)