Co-authored-by: Ke Bao <ISPObaoke@163.com>
This commit is contained in:
@@ -250,7 +250,7 @@ class DeepseekV2Attention(nn.Module):
|
||||
bias=False,
|
||||
quant_config=quant_config,
|
||||
)
|
||||
rope_scaling["type"] = "deepseek_yarn"
|
||||
rope_scaling["rope_type"] = "deepseek_yarn"
|
||||
self.rotary_emb = get_rope(
|
||||
qk_rope_head_dim,
|
||||
rotary_dim=qk_rope_head_dim,
|
||||
@@ -398,7 +398,7 @@ class DeepseekV2AttentionMLA(nn.Module):
|
||||
bias=False,
|
||||
quant_config=quant_config,
|
||||
)
|
||||
rope_scaling["type"] = "deepseek_yarn"
|
||||
rope_scaling["rope_type"] = "deepseek_yarn"
|
||||
self.rotary_emb = get_rope(
|
||||
qk_rope_head_dim,
|
||||
rotary_dim=qk_rope_head_dim,
|
||||
|
||||
Reference in New Issue
Block a user