From d7db6791e79b2c914c3babc688f3788c0e72f1d6 Mon Sep 17 00:00:00 2001 From: Zhu Yi Lin <116337067+GDzhu01@users.noreply.github.com> Date: Wed, 10 Dec 2025 20:45:07 +0800 Subject: [PATCH] [Bugfix] Support for mlapo in deepseekv3.1 w4a8 (#4828) ### What this PR does / why we need it? Support for mlapo in deepseekv3.1 w4a8, since the csrc of mlapo requires the input args `enable_inner_out` and `inner_out`, we add the dummy args here - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 Signed-off-by: GDzhu01 <809721801@qq.com> Co-authored-by: Mengqing Cao --- vllm_ascend/attention/mla_v1.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm_ascend/attention/mla_v1.py b/vllm_ascend/attention/mla_v1.py index 348efc33..5e68b52e 100644 --- a/vllm_ascend/attention/mla_v1.py +++ b/vllm_ascend/attention/mla_v1.py @@ -1064,7 +1064,8 @@ class AscendMLAImpl(MLAAttentionImpl): device = self.q_proj.weight.device self.gamma1 = self.q_a_layernorm.weight.data - self.beta1 = self.q_a_layernorm.bias.data + self.beta1 = torch.zeros_like(self.gamma1) if ( + _bias := self.q_a_layernorm.bias) is None else _bias.data self.gamma2 = self.kv_a_layernorm.weight.data self.quant_scale0 = self.fused_qkv_a_proj.input_scale.data self.quant_offset0 = self.fused_qkv_a_proj.input_offset.data @@ -1460,7 +1461,8 @@ class AscendMLAImpl(MLAAttentionImpl): kv_cache_out0=decode_k_nope, q_out1=decode_q_pe, kv_cache_out1=decode_k_pe, - ) + enable_inner_out=False, + inner_out=torch.tensor([], device=hidden_states.device)) decode_q_nope = decode_q_nope.view(bsz, self.num_heads, self.kv_lora_rank) decode_q_pe = decode_q_pe.view(bsz, self.num_heads, -1)