Fix sgl-kernel benchmark dead code (#11022)

This commit is contained in:
Xiaoyu Zhang
2025-09-29 15:06:40 +08:00
committed by GitHub
parent 71959545df
commit 11965b0daf
25 changed files with 1019 additions and 260 deletions

View File

@@ -1,6 +1,7 @@
import argparse
import copy
import itertools
import os
import torch
import triton
@@ -10,6 +11,12 @@ from sgl_kernel import (
qserve_w4a8_per_group_gemm,
)
# CI environment detection
IS_CI = (
os.getenv("CI", "false").lower() == "true"
or os.getenv("GITHUB_ACTIONS", "false").lower() == "true"
)
def to_int8(tensor: torch.Tensor) -> torch.Tensor:
return torch.round(tensor.clamp(min=-128, max=127)).to(dtype=torch.int8)
@@ -65,10 +72,17 @@ WEIGHT_SHAPES = {
}
# CI environment uses simplified parameters
if IS_CI:
batch_sizes = [1, 16] # Simplified for CI
else:
batch_sizes = [1, 16, 32, 64, 128, 256, 512, 1024, 2048]
@triton.testing.perf_report(
triton.testing.Benchmark(
x_names=["batch_size"],
x_vals=[1, 16, 32, 64, 128, 256, 512, 1024, 2048],
x_vals=batch_sizes,
x_log=False,
line_arg="provider",
line_vals=["FP16", "W8A8", "Qserve_W4A8_Per_Channel", "Qserve_W4A8_Per_Group"],
@@ -184,13 +198,19 @@ if __name__ == "__main__":
)
args = parser.parse_args()
KN_model_names = prepare_shapes(args)
for K, N, model_name in KN_model_names:
print(f"{model_name} N={N} K={K}: ")
benchmark.run(
print_data=True,
N=N,
K=K,
)
# Skip in CI environment
if IS_CI:
print("Skipping QServe W4A8 GEMM benchmark in CI environment")
print("QServe operations may have compatibility issues in CI")
else:
KN_model_names = prepare_shapes(args)
print("Benchmark finished!")
for K, N, model_name in KN_model_names:
print(f"{model_name} N={N} K={K}: ")
benchmark.run(
print_data=True,
N=N,
K=K,
)
print("Benchmark finished!")