Blackwell Cutlass MLA kernel (#5142)

2025-04-11 22:16:51 -07:00
parent 5ad0571903
commit f65b8d5c89
7 changed files with 371 additions and 3 deletions
--- a/sgl-kernel/python/sgl_kernel/init.py
+++ b/sgl-kernel/python/sgl_kernel/init.py
@@ -11,7 +11,11 @@ if os.path.exists("/usr/local/cuda/targets/x86_64-linux/lib/libcudart.so.12"):

 from sgl_kernel import common_ops
 from sgl_kernel.allreduce import *
-from sgl_kernel.attention import lightning_attention_decode
+from sgl_kernel.attention import (
+    cutlass_mla_decode,
+    cutlass_mla_get_workspace_size,
+    lightning_attention_decode,
+)
 from sgl_kernel.elementwise import (
    apply_rope_with_cos_sin_cache_inplace,
    fused_add_rmsnorm,