From c68dfa70acf4afc4f7127859f2ee442c38b0be4a Mon Sep 17 00:00:00 2001
From: Wang Yixuan <88923622+hust17yixuan@users.noreply.github.com>
Date: Tue, 9 Dec 2025 16:55:09 +0800
Subject: [PATCH] [Bugfix]fix bmm_transpose ops in dsv32  (#4791)

### What this PR does / why we need it?
bmm transpose ops can't be used in cp, so add judgement in the modeling

### Does this PR introduce _any_ user-facing change?
No

- vLLM version: v0.12.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9

Signed-off-by: hust17yixuan <303660421@qq.com>
---
 vllm_ascend/attention/sfa_v1.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm_ascend/attention/sfa_v1.py b/vllm_ascend/attention/sfa_v1.py
index cc443f55..6ee35f8c 100644
--- a/vllm_ascend/attention/sfa_v1.py
+++ b/vllm_ascend/attention/sfa_v1.py
@@ -485,7 +485,8 @@ class AscendSFAImpl(MLAAttentionImpl):
 
     def _v_up_proj(self, x):
         if x.dtype in [torch.float16, torch.bfloat16] \
-                and hasattr(torch.ops._C_ascend, "batch_matmul_transpose"):
+                and hasattr(torch.ops._C_ascend, "batch_matmul_transpose") \
+                and not self.enable_sfa_cp:
             x = x.view(-1, self.num_heads, self.kv_lora_rank)
             b, _, _ = x.shape
             res = torch.empty((b, self.num_heads, self.v_head_dim),