[fix] fix cutlass_mla_backend with cuda_graph and add sm_scale for sgl-kernel cutlass_mla (#7184)

2025-06-15 03:45:41 +08:00
parent ed54bf9d19
commit ab1a4fa5cb
7 changed files with 29 additions and 17 deletions
--- a/sgl-kernel/benchmark/bench_cutlass_mla.py
+++ b/sgl-kernel/benchmark/bench_cutlass_mla.py
@@ -95,6 +95,7 @@ def benchmark(batch_size, seq_len, provider, block_size, num_kv_splits):
            seq_lens,
            block_table,
            workspace,
+            1.44,
            num_kv_splits,
        ),
        quantiles=quantiles,