Add Cutlass MLA attention backend (#5390)

2025-04-27 20:58:53 -07:00
parent 40d9b8acce
commit 84810da4ae
7 changed files with 305 additions and 3 deletions
--- a/python/sglang/srt/managers/schedule_batch.py
+++ b/python/sglang/srt/managers/schedule_batch.py
@@ -1515,6 +1515,7 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
            )
            or global_server_args_dict["attention_backend"] == "flashmla"
            or global_server_args_dict["attention_backend"] == "fa3"
+            or global_server_args_dict["attention_backend"] == "cutlass_mla"
        ):
            seq_lens_cpu = self.seq_lens.cpu()
        else: