Add Cutlass MLA attention backend (#5390)

This commit is contained in:
Trevor Morris
2025-04-27 20:58:53 -07:00
committed by GitHub
parent 40d9b8acce
commit 84810da4ae
7 changed files with 305 additions and 3 deletions

View File

@@ -1515,6 +1515,7 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
)
or global_server_args_dict["attention_backend"] == "flashmla"
or global_server_args_dict["attention_backend"] == "fa3"
or global_server_args_dict["attention_backend"] == "cutlass_mla"
):
seq_lens_cpu = self.seq_lens.cpu()
else: