[Model][1/N] Delete deepseek v2/v3 modeling codes. (#3189)
This PR deletes model codes of deepseek_v2 and deepseek_v3 to reuse the model file from vLLM. vLLM Ascend now uses custom ops register way instead of model file hard-coding. - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: whx-sjtu <2952154980@qq.com>
This commit is contained in:
@@ -570,7 +570,8 @@ class TorchairDeepseekV2MLAAttention(DeepseekV2MLAAttention):
|
||||
qk_head_dim=self.qk_head_dim,
|
||||
v_head_dim=self.v_head_dim,
|
||||
rotary_emb=self.rotary_emb,
|
||||
q_proj=self.q_proj if self.q_lora_rank is None else self.q_b_proj,
|
||||
q_proj=self.q_proj if self.q_lora_rank is None else None,
|
||||
q_b_proj=self.q_b_proj if self.q_lora_rank is not None else None,
|
||||
kv_a_proj_with_mqa=self.kv_a_proj_with_mqa,
|
||||
kv_a_layernorm=self.kv_a_layernorm,
|
||||
kv_b_proj=self.kv_b_proj,
|
||||
|
||||
@@ -656,7 +656,8 @@ class AscendMLATorchairImpl(MLAAttentionImpl):
|
||||
self.qk_head_dim = kwargs['qk_head_dim']
|
||||
self.v_head_dim = kwargs['v_head_dim']
|
||||
self.rotary_emb = kwargs['rotary_emb']
|
||||
self.q_proj = kwargs['q_proj']
|
||||
self.q_proj = kwargs['q_proj'] if self.q_lora_rank is None else kwargs[
|
||||
'q_b_proj']
|
||||
self.kv_b_proj = kwargs['kv_b_proj']
|
||||
self.o_proj = kwargs['o_proj']
|
||||
self.kv_a_proj_with_mqa = kwargs.get('kv_a_proj_with_mqa', None)
|
||||
|
||||
Reference in New Issue
Block a user