[fix] fix cutlass_mla_backend with cuda_graph and add sm_scale for sgl-kernel cutlass_mla (#7184)
This commit is contained in:
@@ -95,6 +95,7 @@ def benchmark(batch_size, seq_len, provider, block_size, num_kv_splits):
|
||||
seq_lens,
|
||||
block_table,
|
||||
workspace,
|
||||
1.44,
|
||||
num_kv_splits,
|
||||
),
|
||||
quantiles=quantiles,
|
||||
|
||||
Reference in New Issue
Block a user