Update vllm to 0.6.3 (#1711) (#1720)

Co-authored-by: Ke Bao <ISPObaoke@163.com>
This commit is contained in:
Yineng Zhang
2024-10-19 20:45:41 -07:00
committed by GitHub
parent 12cad0feae
commit 8bee20f80b
9 changed files with 133 additions and 76 deletions

View File

@@ -250,7 +250,7 @@ class DeepseekV2Attention(nn.Module):
bias=False,
quant_config=quant_config,
)
rope_scaling["type"] = "deepseek_yarn"
rope_scaling["rope_type"] = "deepseek_yarn"
self.rotary_emb = get_rope(
qk_rope_head_dim,
rotary_dim=qk_rope_head_dim,
@@ -398,7 +398,7 @@ class DeepseekV2AttentionMLA(nn.Module):
bias=False,
quant_config=quant_config,
)
rope_scaling["type"] = "deepseek_yarn"
rope_scaling["rope_type"] = "deepseek_yarn"
self.rotary_emb = get_rope(
qk_rope_head_dim,
rotary_dim=qk_rope_head_dim,