[Bugfix]fix bmm_transpose ops in dsv32 (#4791)

### What this PR does / why we need it? bmm transpose ops can't be used in cp, so add judgement in the modeling ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.12.0 - vLLM main: ad32e3e19c Signed-off-by: hust17yixuan <303660421@qq.com>
2025-12-09 16:55:09 +08:00
parent c8b671c498
commit c68dfa70ac
1 changed files with 2 additions and 1 deletions
--- a/vllm_ascend/attention/sfa_v1.py
+++ b/vllm_ascend/attention/sfa_v1.py
@@ -485,7 +485,8 @@ class AscendSFAImpl(MLAAttentionImpl):

    def _v_up_proj(self, x):
        if x.dtype in [torch.float16, torch.bfloat16] \
-                and hasattr(torch.ops._C_ascend, "batch_matmul_transpose"):
+                and hasattr(torch.ops._C_ascend, "batch_matmul_transpose") \
+                and not self.enable_sfa_cp:
            x = x.view(-1, self.num_heads, self.kv_lora_rank)
            b, _, _ = x.shape
            res = torch.empty((b, self.num_heads, self.v_head_dim),