Fix deepseek awq v3 (#3450)

This commit is contained in:
Liangsheng Yin
2025-02-12 22:09:52 +08:00
committed by GitHub
parent 8adbc78b30
commit 8616357a97
4 changed files with 69 additions and 10 deletions

View File

@@ -255,6 +255,8 @@ class DeepseekV2Attention(nn.Module):
self.kv_lora_rank + self.qk_rope_head_dim,
bias=False,
quant_config=quant_config,
# FIXME: quick fix for skip quantization
prefix=f"self_attn.kv_a_proj_with_mqa",
)
self.kv_a_layernorm = RMSNorm(self.kv_lora_rank, eps=config.rms_norm_eps)
self.kv_b_proj = ColumnParallelLinear(
@@ -455,6 +457,8 @@ class DeepseekV2AttentionMLA(nn.Module):
self.kv_lora_rank + self.qk_rope_head_dim,
bias=False,
quant_config=quant_config,
# FIXME: quick fix for skip quantization
prefix=f"self_attn.kv_a_proj_with_mqa",
)
self.kv_a_layernorm = RMSNorm(self.kv_lora_rank, eps=config.rms_norm_eps)