diff --git a/vllm_ascend/attention/mla_v1.py b/vllm_ascend/attention/mla_v1.py
index 348efc33..5e68b52e 100644
--- a/vllm_ascend/attention/mla_v1.py
+++ b/vllm_ascend/attention/mla_v1.py
@@ -1064,7 +1064,8 @@ class AscendMLAImpl(MLAAttentionImpl):
 
         device = self.q_proj.weight.device
         self.gamma1 = self.q_a_layernorm.weight.data
-        self.beta1 = self.q_a_layernorm.bias.data
+        self.beta1 = torch.zeros_like(self.gamma1) if (
+            _bias := self.q_a_layernorm.bias) is None else _bias.data
         self.gamma2 = self.kv_a_layernorm.weight.data
         self.quant_scale0 = self.fused_qkv_a_proj.input_scale.data
         self.quant_offset0 = self.fused_qkv_a_proj.input_offset.data
@@ -1460,7 +1461,8 @@ class AscendMLAImpl(MLAAttentionImpl):
             kv_cache_out0=decode_k_nope,
             q_out1=decode_q_pe,
             kv_cache_out1=decode_k_pe,
-        )
+            enable_inner_out=False,
+            inner_out=torch.tensor([], device=hidden_states.device))
         decode_q_nope = decode_q_nope.view(bsz, self.num_heads,
                                            self.kv_lora_rank)
         decode_q_pe = decode_q_pe.view(bsz, self.num_heads, -1)