diff --git a/vllm_ascend/attention/sfa_v1.py b/vllm_ascend/attention/sfa_v1.py index cc443f55..6ee35f8c 100644 --- a/vllm_ascend/attention/sfa_v1.py +++ b/vllm_ascend/attention/sfa_v1.py @@ -485,7 +485,8 @@ class AscendSFAImpl(MLAAttentionImpl): def _v_up_proj(self, x): if x.dtype in [torch.float16, torch.bfloat16] \ - and hasattr(torch.ops._C_ascend, "batch_matmul_transpose"): + and hasattr(torch.ops._C_ascend, "batch_matmul_transpose") \ + and not self.enable_sfa_cp: x = x.view(-1, self.num_heads, self.kv_lora_rank) b, _, _ = x.shape res = torch.empty((b, self.num_heads, self.v_head_dim),