Support nextn for flashinfer mla attention backend (#4218)

This commit is contained in:
Baizhou Zhang
2025-03-09 00:01:54 -08:00
committed by GitHub
parent 89ccb533ad
commit 9fb48f951f
5 changed files with 393 additions and 58 deletions

View File

@@ -555,6 +555,8 @@ class DeepseekV2AttentionMLA(nn.Module):
return (
not global_server_args_dict["flashinfer_mla_disable_ragged"]
and forward_batch.forward_mode.is_extend()
and not forward_batch.forward_mode.is_target_verify()
and not forward_batch.forward_mode.is_draft_extend()
and forward_batch.extend_prefix_lens.sum() == 0
)
else: