Use int64 as indices for set_kv_buffer (#3039)

This commit is contained in:
Lianmin Zheng
2025-01-21 19:46:09 -08:00
committed by GitHub
parent a42213dbd4
commit 3d8f1c9bcf
6 changed files with 30 additions and 37 deletions

View File

@@ -99,10 +99,7 @@ class BenchArgs:
parser.add_argument("--correctness-test", action="store_true")
parser.add_argument("--cut-len", type=int, default=BenchArgs.cut_len)
parser.add_argument(
"--profile",
action="store_true",
help="Use Torch Profiler. The endpoint must be launched with "
"SGLANG_TORCH_PROFILER_DIR to enable profiler.",
"--profile", action="store_true", help="Use Torch Profiler."
)
parser.add_argument(
"--profile-filename-prefix",
@@ -381,6 +378,7 @@ def latency_test_run_once(
parent_dir = os.path.dirname(os.path.abspath(profile_filename))
os.makedirs(parent_dir, exist_ok=True)
profiler.export_chrome_trace(profile_filename)
rank_print(f"torch profiler chrome trace saved to {profile_filename}")
# Record decode timing from 2nd output
if output_len > 1:
@@ -451,7 +449,7 @@ def latency_test(
il,
ol,
server_args.device,
bench_args.profile,
bench_args.profile if tp_rank == 0 else None,
bench_args.profile_filename_prefix,
)
if ret is not None: