Support LoRA in TestOpenAIVisionServer and fix fused kv_proj loading bug. (#6861)
This commit is contained in:
@@ -165,14 +165,19 @@ class LoRAAdapter(nn.Module):
|
||||
self.base_hf_config.hidden_size
|
||||
// self.base_hf_config.num_attention_heads
|
||||
)
|
||||
weights[q_name], weights[kv_name] = torch.split(
|
||||
weights[q_name], k_proj_weight, v_proj_weight = torch.split(
|
||||
weights[qkv_name],
|
||||
[
|
||||
head_size * self.base_hf_config.num_attention_heads,
|
||||
head_size * self.base_hf_config.num_key_value_heads * 2,
|
||||
head_size * self.base_hf_config.num_key_value_heads,
|
||||
head_size * self.base_hf_config.num_key_value_heads,
|
||||
],
|
||||
dim=0,
|
||||
)
|
||||
weights[kv_name] = torch.stack(
|
||||
[k_proj_weight, v_proj_weight],
|
||||
dim=0,
|
||||
)
|
||||
|
||||
def normalize_gate_up_proj(
|
||||
self, weight_names: List[str], weights: Dict[str, torch.Tensor]
|
||||
|
||||
Reference in New Issue
Block a user