From 8b3a7a9e874c484a28efe89ce25b47e6eb809349 Mon Sep 17 00:00:00 2001
From: cookieyyds <126683903+cookieyyds@users.noreply.github.com>
Date: Thu, 8 Jan 2026 15:47:31 +0800
Subject: [PATCH] [bugfix] Support dsv3.2 enable both mtp and full_decode_only
 (#5679)

### What this PR does / why we need it?
#5230 this PR introduced a problem when both mtp and full_decode_only
are enabled for the DSV32 model, the operators cannot be compiled into
the graph. This PR fixes that issue.

- vLLM version: v0.13.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d

Signed-off-by: cookieyyds <126683903+cookieyyds@users.noreply.github.com>
---
 vllm_ascend/attention/sfa_v1.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm_ascend/attention/sfa_v1.py b/vllm_ascend/attention/sfa_v1.py
index 1bfe8e67..c4a2a51e 100644
--- a/vllm_ascend/attention/sfa_v1.py
+++ b/vllm_ascend/attention/sfa_v1.py
@@ -167,7 +167,7 @@ class AscendSFAMetadataBuilder(MLACommonMetadataBuilder[AscendSFAMetadata]):
     ) -> AttentionCGSupport:
         # Explicit override in case the underlying builder specialized this getter.
         # @override omitted only because of mypy limitation due to type variable.
-        return AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE
+        return AttentionCGSupport.UNIFORM_BATCH
 
     def reorder_batch(self, input_batch: "NPUInputBatch",
                       scheduler_output: "SchedulerOutput") -> bool: