From 8b3a7a9e874c484a28efe89ce25b47e6eb809349 Mon Sep 17 00:00:00 2001 From: cookieyyds <126683903+cookieyyds@users.noreply.github.com> Date: Thu, 8 Jan 2026 15:47:31 +0800 Subject: [PATCH] [bugfix] Support dsv3.2 enable both mtp and full_decode_only (#5679) ### What this PR does / why we need it? #5230 this PR introduced a problem when both mtp and full_decode_only are enabled for the DSV32 model, the operators cannot be compiled into the graph. This PR fixes that issue. - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d Signed-off-by: cookieyyds <126683903+cookieyyds@users.noreply.github.com> --- vllm_ascend/attention/sfa_v1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_ascend/attention/sfa_v1.py b/vllm_ascend/attention/sfa_v1.py index 1bfe8e67..c4a2a51e 100644 --- a/vllm_ascend/attention/sfa_v1.py +++ b/vllm_ascend/attention/sfa_v1.py @@ -167,7 +167,7 @@ class AscendSFAMetadataBuilder(MLACommonMetadataBuilder[AscendSFAMetadata]): ) -> AttentionCGSupport: # Explicit override in case the underlying builder specialized this getter. # @override omitted only because of mypy limitation due to type variable. - return AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE + return AttentionCGSupport.UNIFORM_BATCH def reorder_batch(self, input_batch: "NPUInputBatch", scheduler_output: "SchedulerOutput") -> bool: