Support dynamic LoRA loading / unloading in engine/server API (#7446)

This commit is contained in:
Lifu Huang
2025-06-27 21:00:27 -07:00
committed by GitHub
parent cfe2edac38
commit 49538d111b
14 changed files with 949 additions and 31 deletions

View File

@@ -65,7 +65,7 @@ class LoRAAdapter(nn.Module):
self.layers: List[LoRALayer] = nn.ModuleList(
[
LoRALayer(config, base_hf_config)
for i in range(base_hf_config.num_hidden_layers)
for _ in range(base_hf_config.num_hidden_layers)
]
)
@@ -88,10 +88,9 @@ class LoRAAdapter(nn.Module):
else:
self.weights[name] = loaded_weight.cpu()
# stack kv_proj and gate_up_proj
for i in range(self.base_hf_config.num_hidden_layers):
layer = self.layers[i]
weight_names = [name for name, _ in layer.weights.items()]
# normalize kv_proj and gate_up_proj
for layer in self.layers:
weight_names = list(layer.weights.keys())
self.normalize_qkv_proj(weight_names, layer.weights)
self.normalize_gate_up_proj(weight_names, layer.weights)