Blackwell Cutlass MLA kernel (#5142)

This commit is contained in:
Trevor Morris
2025-04-11 22:16:51 -07:00
committed by GitHub
parent 5ad0571903
commit f65b8d5c89
7 changed files with 371 additions and 3 deletions

View File

@@ -11,7 +11,11 @@ if os.path.exists("/usr/local/cuda/targets/x86_64-linux/lib/libcudart.so.12"):
from sgl_kernel import common_ops
from sgl_kernel.allreduce import *
from sgl_kernel.attention import lightning_attention_decode
from sgl_kernel.attention import (
cutlass_mla_decode,
cutlass_mla_get_workspace_size,
lightning_attention_decode,
)
from sgl_kernel.elementwise import (
apply_rope_with_cos_sin_cache_inplace,
fused_add_rmsnorm,