[Feat][UT] Support Deepseekv32 FULL_DECODE_ONLY mode and add unit test of sfa_v1 (#3763)

### What this PR does / why we need it? - Add support for DeepSeek v3.2 in FULL_DECODE_ONLY mode. - Add unit test for sfa_v1. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.11.0 - vLLM main: 83f478bb19 --------- Signed-off-by: 1Fire4 <wangdingyi2@huawei.com>
2025-11-03 10:02:47 +08:00
parent d4c75088a0
commit 0b9b6d79fe
3 changed files with 216 additions and 6 deletions
--- a/vllm_ascend/attention/sfa_v1.py
+++ b/vllm_ascend/attention/sfa_v1.py
@@ -91,7 +91,7 @@ M = TypeVar("M", bound=AscendSFAMetadata)
 class AscendSFAMetadataBuilder:
    # Does this backend/builder support ACL Graphs for attention (default: no).
    aclgraph_support: ClassVar[AttentionCGSupport] = \
-        AttentionCGSupport.NEVER
+        AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE
    """
    NOTE: Please read the comment at the top of the file before trying to
    understand this class
@@ -189,6 +189,26 @@ class AscendSFAMetadataBuilder:
            sin=sin,
            cos=cos)

+    def build_for_graph_capture(
+        self,
+        common_attn_metadata: AscendCommonAttentionMetadata,
+        attn_state: AscendAttentionState = AscendAttentionState.DecodeOnly,
+        model: Optional[nn.Module] = None,
+    ):
+        if attn_state == AscendAttentionState.DecodeOnly:
+            attn_metadata = self.build(
+                common_prefix_len=0,
+                common_attn_metadata=common_attn_metadata,
+                model=model,
+            )
+        else:
+            raise NotImplementedError(
+                "Currently we only support building dummy metadata for DecodeOnly state"
+            )
+
+        attn_metadata.attn_state = attn_state
+        return attn_metadata
+

 class AscendSFAImpl(MLAAttentionImpl):
    """