[Feat][UT] Support Deepseekv32 FULL_DECODE_ONLY mode and add unit test of sfa_v1 (#3763)
### What this PR does / why we need it?
- Add support for DeepSeek v3.2 in FULL_DECODE_ONLY mode.
- Add unit test for sfa_v1.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
---------
Signed-off-by: 1Fire4 <wangdingyi2@huawei.com>
This commit is contained in:
@@ -91,7 +91,7 @@ M = TypeVar("M", bound=AscendSFAMetadata)
|
||||
class AscendSFAMetadataBuilder:
|
||||
# Does this backend/builder support ACL Graphs for attention (default: no).
|
||||
aclgraph_support: ClassVar[AttentionCGSupport] = \
|
||||
AttentionCGSupport.NEVER
|
||||
AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE
|
||||
"""
|
||||
NOTE: Please read the comment at the top of the file before trying to
|
||||
understand this class
|
||||
@@ -189,6 +189,26 @@ class AscendSFAMetadataBuilder:
|
||||
sin=sin,
|
||||
cos=cos)
|
||||
|
||||
def build_for_graph_capture(
|
||||
self,
|
||||
common_attn_metadata: AscendCommonAttentionMetadata,
|
||||
attn_state: AscendAttentionState = AscendAttentionState.DecodeOnly,
|
||||
model: Optional[nn.Module] = None,
|
||||
):
|
||||
if attn_state == AscendAttentionState.DecodeOnly:
|
||||
attn_metadata = self.build(
|
||||
common_prefix_len=0,
|
||||
common_attn_metadata=common_attn_metadata,
|
||||
model=model,
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Currently we only support building dummy metadata for DecodeOnly state"
|
||||
)
|
||||
|
||||
attn_metadata.attn_state = attn_state
|
||||
return attn_metadata
|
||||
|
||||
|
||||
class AscendSFAImpl(MLAAttentionImpl):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user