Restruct sgl-kernel benchmark (#10861)
This commit is contained in:
@@ -97,7 +97,7 @@ def benchmark(num_tokens, num_experts, topk, provider):
|
||||
fn = lambda: sglang_topk_softmax(gating_output, topk)
|
||||
|
||||
quantiles = [0.5, 0.2, 0.8]
|
||||
ms, min_ms, max_ms = triton.testing.do_bench(fn, quantiles=quantiles)
|
||||
ms, min_ms, max_ms = triton.testing.do_bench_cudagraph(fn, quantiles=quantiles)
|
||||
|
||||
return 1000 * ms, 1000 * max_ms, 1000 * min_ms
|
||||
|
||||
|
||||
Reference in New Issue
Block a user